diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..61ab8ba --- /dev/null +++ b/.gitignore @@ -0,0 +1,90 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0d65e17 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Tomas Hodan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..629b778 --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +# BOP Toolkit + +A Python toolkit of the BOP benchmark for 6D object pose estimation +(http://bop.felk.cvut.cz). + +- **bop_toolkit_lib** - The core Python library for i/o operations, calculation + of pose errors, Python based rendering etc. +- **docs** - Documentation and conventions. +- **scripts** - Scripts for evaluation, rendering of training images, + visualization of 6D object poses etc. + +## Installation + +### Python Dependencies + +To install the required python libraries, run: +``` +pip install -r requirements.txt +``` + +In the case of problems, try to first run: ```pip install --upgrade pip setuptools``` + +### Python Renderer + +The Python based renderer is implemented using +[Glumpy](https://glumpy.github.io/) which depends on +[freetype](https://www.freetype.org/) and [GLFW](https://www.glfw.org/). +Glumpy is installed using the pip command above. On Linux, freetype and GLFW can +be installed by: + +``` +apt-get install freetype +apt-get install libglfw3 +``` + +To install freetype and GLFW on Windows, follow [these instructions](https://glumpy.readthedocs.io/en/latest/installation.html#step-by-step-install-for-x64-bit-windows-7-8-and-10). + +GLFW serves as a backend of Glumpy. [Another backends](https://glumpy.readthedocs.io/en/latest/api/app-backends.html) +can be used but were not tested with our code. + +### C++ Renderer + +To speed up rendering, we recommend installing [bop_renderer](https://github.com/thodan/bop_renderer), +an off-screen C++ renderer with Python bindings. + +See *scripts/eval_calc_errors.py* for an example on how to use the Python and +C++ renderers - you can switch between them by setting *renderer_type* to +*'python'* or *'cpp'*. diff --git a/bop_toolkit_lib/__init__.py b/bop_toolkit_lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bop_toolkit_lib/colors.json b/bop_toolkit_lib/colors.json new file mode 100644 index 0000000..1d0254d --- /dev/null +++ b/bop_toolkit_lib/colors.json @@ -0,0 +1,32 @@ +[ + [0.89, 0.28, 0.13], + [0.45, 0.38, 0.92], + [0.35, 0.73, 0.63], + [0.62, 0.28, 0.91], + [0.65, 0.71, 0.22], + [0.8, 0.29, 0.89], + [0.27, 0.55, 0.22], + [0.37, 0.46, 0.84], + [0.84, 0.63, 0.22], + [0.68, 0.29, 0.71], + [0.48, 0.75, 0.48], + [0.88, 0.27, 0.75], + [0.82, 0.45, 0.2], + [0.86, 0.27, 0.27], + [0.52, 0.49, 0.18], + [0.33, 0.67, 0.25], + [0.67, 0.42, 0.29], + [0.67, 0.46, 0.86], + [0.36, 0.72, 0.84], + [0.85, 0.29, 0.4], + [0.24, 0.53, 0.55], + [0.85, 0.55, 0.8], + [0.4, 0.51, 0.33], + [0.56, 0.38, 0.63], + [0.78, 0.66, 0.46], + [0.33, 0.5, 0.72], + [0.83, 0.31, 0.56], + [0.56, 0.61, 0.85], + [0.89, 0.58, 0.57], + [0.67, 0.4, 0.49] +] \ No newline at end of file diff --git a/bop_toolkit_lib/config.py b/bop_toolkit_lib/config.py new file mode 100644 index 0000000..e8d0741 --- /dev/null +++ b/bop_toolkit_lib/config.py @@ -0,0 +1,23 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Configuration of the BOP Toolkit.""" + + +# Folder with the BOP datasets. +datasets_path = r'/path/to/bop/datasets' + +# Folder for outputs (e.g. visualizations). +output_path = r'/path/to/output/folder' + +# Folder with results to be evaluated. +results_path = r'/path/to/folder/with/results' + +# Folder for the calculated pose errors and performance scores. +eval_path = r'/path/to/eval/folder' + +# Path to the build folder of bop_renderer (github.com/thodan/bop_renderer). +bop_renderer_path = r'/path/to/bop_renderer/build' + +# Executable of the MeshLab server. +meshlab_server_path = r'/path/to/meshlabserver.exe' diff --git a/bop_toolkit_lib/dataset_params.py b/bop_toolkit_lib/dataset_params.py new file mode 100644 index 0000000..1307b5c --- /dev/null +++ b/bop_toolkit_lib/dataset_params.py @@ -0,0 +1,321 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Parameters of the BOP datasets.""" + +import math +from os.path import join + +from bop_toolkit_lib import inout + + +def get_camera_params(datasets_path, dataset_name, cam_type=None): + """Return camera parameters for the specified dataset. + + Note that parameters returned by this functions are meant only for simulation + of the used sensor when rendering training images. To get per-image camera + parameters (which may vary), use path template 'scene_camera_tpath' contained + in the dictionary returned by function get_split_params. + + :param datasets_path: Path to a folder with datasets. + :param dataset_name: Name of the dataset for which to return the parameters. + :param cam_type: Type of camera. + :return: Dictionary with camera parameters for the specified dataset. + """ + # T-LESS includes images captured by three sensors. + if dataset_name == 'tless': + + # Use images from the Primesense sensor as default. + if cam_type is None: + cam_type = 'primesense' + cam_filename = 'camera_{}.json'.format(cam_type) + + else: + cam_filename = 'camera.json' + + # Path to the camera file. + cam_params_path = join(datasets_path, dataset_name, cam_filename) + + p = { + # Path to a file with camera parameters. + 'cam_params_path': cam_params_path, + } + + # Add a dictionary containing the intrinsic camera matrix ('K'), image size + # ('im_size'), and scale of the depth images ('depth_scale', optional). + p.update(inout.load_cam_params(cam_params_path)) + + return p + + +def get_model_params(datasets_path, dataset_name, model_type=None): + """Return parameters of object models for the specified dataset. + + :param datasets_path: Path to a folder with datasets. + :param dataset_name: Name of the dataset for which to return the parameters. + :param model_type: Type of object models. + :return: Dictionary with object model parameters for the specified dataset. + """ + # Object ID's. + obj_ids = { + 'lm': range(1, 16), + 'lmo': [1, 5, 6, 8, 9, 10, 11, 12], + 'tless': range(1, 31), + 'tudl': range(1, 4), + 'tyol': range(1, 22), + 'ruapc': range(1, 15), + 'icmi': range(1, 7), + 'icbin': range(1, 3), + 'itodd': range(1, 29), + 'hb': [1, 3, 4, 8, 9, 10, 12, 15, 17, 18, 19, 22, 23, 29, 32, 33], + # 'hb': range(1, 34), # Original HB dataset. + }[dataset_name] + + # ID's of objects with symmetries. + symmetric_obj_ids = { + 'lm': [3, 7, 10, 11], + 'lmo': [10, 11], + 'tless': range(1, 31), + 'tudl': [], + 'tyol': [3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21], + 'ruapc': [8, 9, 12, 13], + 'icmi': [1, 2, 6], + 'icbin': [1], + 'itodd': [2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 17, 18, 19, 23, 24, 25, 27, 28], + 'hb': [6, 10, 11, 12, 13, 14, 18, 24, 29], + }[dataset_name] + + # T-LESS includes two types of object models, CAD and reconstructed. + # Use the CAD models as default. + if dataset_name == 'tless' and model_type is None: + model_type = 'cad' + + # Name of the folder with object models. + models_folder_name = 'models' + if model_type is not None: + models_folder_name += '_' + model_type + + # Path to the folder with object models. + models_path = join(datasets_path, dataset_name, models_folder_name) + + p = { + # ID's of all objects included in the dataset. + 'obj_ids': obj_ids, + + # ID's of objects with symmetries. + 'symmetric_obj_ids': symmetric_obj_ids, + + # Path template to an object model file. + 'model_tpath': join(models_path, 'obj_{obj_id:06d}.ply'), + + # Path to a file with meta information about the object models. + 'models_info_path': join(models_path, 'models_info.json') + } + + return p + + +def get_split_params(datasets_path, dataset_name, split, split_type=None): + """Return parameters (camera params, paths etc.) for the specified dataset. + + :param datasets_path: Path to a folder with datasets. + :param dataset_name: Name of the dataset for which to return the parameters. + :param split: Name of the dataset split ('train', 'val', 'test'). + :param split_type: Name of the split type (e.g. for T-LESS, possible types of + the 'train' split are: 'primesense', 'render_reconst'). + :return: Dictionary with parameters for the specified dataset split. + """ + p = { + 'name': dataset_name, + 'split': split, + 'split_type': split_type, + 'base_path': join(datasets_path, dataset_name), + + 'depth_range': None, + 'azimuth_range': None, + 'elev_range': None, + } + + gray_ext = '.png' + rgb_ext = '.png' + depth_ext = '.png' + + p['im_modalities'] = ['rgb', 'depth'] + + # Linemod (LM). + if dataset_name == 'lm': + p['scene_ids'] = range(1, 16) + p['im_size'] = (640, 480) + + if split == 'test': + p['depth_range'] = (600.90, 1102.35) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (0, 0.5 * math.pi) + + # Linemod-Occluded (LM). + elif dataset_name == 'lmo': + p['scene_ids'] = {'train': [1, 5, 6, 8, 9, 10, 11, 12], 'test': [2]}[split] + p['im_size'] = (640, 480) + + if split == 'test': + p['depth_range'] = (346.31, 1499.84) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (0, 0.5 * math.pi) + + # T-LESS. + elif dataset_name == 'tless': + p['scene_ids'] = {'train': range(1, 31), 'test': range(1, 21)}[split] + + # Use images from the Primesense sensor by default. + if split_type is None: + split_type = 'primesense' + + p['im_size'] = { + 'train': { + 'primesense': (400, 400), + 'kinect': (400, 400), + 'canon': (1900, 1900), + 'render_reconst': (1280, 1024) + }, + 'test': { + 'primesense': (720, 540), + 'kinect': (720, 540), + 'canon': (2560, 1920) + } + }[split][split_type] + + # The following holds for Primesense, but is similar for the other sensors. + if split == 'test': + p['depth_range'] = (649.89, 940.04) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (-0.5 * math.pi, 0.5 * math.pi) + + # TU Dresden Light (TUD-L). + elif dataset_name == 'tudl': + if split == 'train' and split_type is None: + split_type = 'render' + + p['scene_ids'] = range(1, 4) + p['im_size'] = (640, 480) + + if split == 'test': + p['depth_range'] = (851.29, 2016.14) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (-0.4363, 0.5 * math.pi) # (-25, 90) [deg]. + + # Toyota Light (TYO-L). + elif dataset_name == 'tyol': + p['scene_ids'] = range(1, 22) + p['im_size'] = (640, 480) + + if split == 'test': + p['depth_range'] = (499.57, 1246.07) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (-0.5 * math.pi, 0.5 * math.pi) + + # Rutgers APC (RU-APC). + elif dataset_name == 'ruapc': + p['scene_ids'] = range(1, 15) + p['im_size'] = (640, 480) + + if split == 'test': + p['depth_range'] = (594.41, 739.12) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (-0.5 * math.pi, 0.5 * math.pi) + + # Tejani et al. (IC-MI). + elif dataset_name == 'icmi': + p['scene_ids'] = range(1, 7) + p['im_size'] = (640, 480) + + if split == 'test': + p['depth_range'] = (509.12, 1120.41) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (0, 0.5 * math.pi) + + # Doumanoglou et al. (IC-BIN). + elif dataset_name == 'icbin': + p['scene_ids'] = {'train': range(1, 3), 'test': range(1, 4)}[split] + p['im_size'] = (640, 480) + + if split == 'test': + p['depth_range'] = (454.56, 1076.29) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (-1.0297, 0.5 * math.pi) # (-59, 90) [deg]. + + # MVTec ITODD. + elif dataset_name == 'itodd': + p['scene_ids'] = { + 'train': [], + 'val': [1], + 'test': [1] + }[split] + p['im_size'] = (1280, 960) + + gray_ext = '.tif' + depth_ext = '.tif' + p['im_modalities'] = ['gray', 'depth'] + + if split == 'test': + p['depth_range'] = None + p['azimuth_range'] = None + p['elev_range'] = None + + # HomebrewedDB (HB). + elif dataset_name == 'hb': + p['scene_ids'] = { + 'train': [], + 'val': [3, 5, 13], + 'test': [3, 5, 13], + }[split] + p['im_size'] = (640, 480) + + if split == 'test': + p['depth_range'] = (420.0, 1430.0) + p['azimuth_range'] = (0, 2 * math.pi) + p['elev_range'] = (0.1920, 1.5184) # (11, 87) [deg]. + + else: + raise ValueError('Unknown BOP dataset.') + + base_path = join(datasets_path, dataset_name) + split_path = join(base_path, split) + if split_type is not None: + split_path += '_' + split_type + + p.update({ + # Path template to a file with per-image camera parameters. + 'scene_camera_tpath': join( + split_path, '{scene_id:06d}', 'scene_camera.json'), + + # Path template to a file with GT annotations. + 'scene_gt_tpath': join( + split_path, '{scene_id:06d}', 'scene_gt.json'), + + # Path template to a file with meta information about the GT annotations. + 'scene_gt_info_tpath': join( + split_path, '{scene_id:06d}', 'scene_gt_info.json'), + + # Path template to a gray image. + 'gray_tpath': join( + split_path, '{scene_id:06d}', 'gray', '{im_id:06d}' + gray_ext), + + # Path template to an RGB image. + 'rgb_tpath': join( + split_path, '{scene_id:06d}', 'rgb', '{im_id:06d}' + rgb_ext), + + # Path template to a depth image. + 'depth_tpath': join( + split_path, '{scene_id:06d}', 'depth', '{im_id:06d}' + depth_ext), + + # Path template to a mask of the full object silhouette. + 'mask_tpath': join( + split_path, '{scene_id:06d}', 'mask', '{im_id:06d}_{gt_id:06d}.png'), + + # Path template to a mask of the visible part of an object silhouette. + 'mask_visib_tpath': join( + split_path, '{scene_id:06d}', 'mask_visib', + '{im_id:06d}_{gt_id:06d}.png'), + }) + + return p diff --git a/bop_toolkit_lib/droid_sans_mono.ttf b/bop_toolkit_lib/droid_sans_mono.ttf new file mode 100644 index 0000000..a007071 Binary files /dev/null and b/bop_toolkit_lib/droid_sans_mono.ttf differ diff --git a/bop_toolkit_lib/droid_sans_mono_license.txt b/bop_toolkit_lib/droid_sans_mono_license.txt new file mode 100644 index 0000000..989e2c5 --- /dev/null +++ b/bop_toolkit_lib/droid_sans_mono_license.txt @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/bop_toolkit_lib/inout.py b/bop_toolkit_lib/inout.py new file mode 100644 index 0000000..ce74508 --- /dev/null +++ b/bop_toolkit_lib/inout.py @@ -0,0 +1,655 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""I/O functions.""" + +import os +import struct +import numpy as np +import imageio +import png +import json + +from bop_toolkit_lib import misc + + +def load_im(path): + """Loads an image from a file. + + :param path: Path to the image file to load. + :return: ndarray with the loaded image. + """ + im = imageio.imread(path) + return im + + +def save_im(path, im, jpg_quality=95): + """Saves an image to a file. + + :param path: Path to the output image file. + :param im: ndarray with the image to save. + :param jpg_quality: Quality of the saved image (applies only to JPEG). + """ + ext = os.path.splitext(path)[1][1:] + if ext.lower() in ['jpg', 'jpeg']: + imageio.imwrite(path, im, quality=jpg_quality) + else: + imageio.imwrite(path, im) + + +def load_depth(path): + """Loads a depth image from a file. + + :param path: Path to the depth image file to load. + :return: ndarray with the loaded depth image. + """ + d = imageio.imread(path) + return d.astype(np.float32) + + +def save_depth(path, im): + """Saves a depth image (16-bit) to a PNG file. + + :param path: Path to the output depth image file. + :param im: ndarray with the depth image to save. + """ + if path.split('.')[-1].lower() != 'png': + raise ValueError('Only PNG format is currently supported.') + + im_uint16 = np.round(im).astype(np.uint16) + + # PyPNG library can save 16-bit PNG and is faster than imageio.imwrite(). + w_depth = png.Writer(im.shape[1], im.shape[0], greyscale=True, bitdepth=16) + with open(path, 'wb') as f: + w_depth.write(f, np.reshape(im_uint16, (-1, im.shape[1]))) + + +def load_json(path, keys_to_int=False): + """Loads content of a JSON file. + + :param path: Path to the JSON file. + :return: Content of the loaded JSON file. + """ + # Keys to integers. + def convert_keys_to_int(x): + return {int(k) if k.lstrip('-').isdigit() else k: v for k, v in x.items()} + + with open(path, 'r') as f: + if keys_to_int: + content = json.load(f, object_hook=lambda x: convert_keys_to_int(x)) + else: + content = json.load(f) + + return content + + +def save_json(path, content): + """Saves the provided content to a JSON file. + + :param path: Path to the output JSON file. + :param content: Dictionary/list to save. + """ + with open(path, 'w') as f: + + if isinstance(content, dict): + f.write('{\n') + content_sorted = sorted(content.items(), key=lambda x: x[0]) + for elem_id, (k, v) in enumerate(content_sorted): + f.write(' \"{}\": {}'.format(k, json.dumps(v, sort_keys=True))) + if elem_id != len(content) - 1: + f.write(',') + f.write('\n') + f.write('}') + + elif isinstance(content, list): + f.write('[\n') + for elem_id, elem in enumerate(content): + f.write(' {}'.format(json.dumps(elem, sort_keys=True))) + if elem_id != len(content) - 1: + f.write(',') + f.write('\n') + f.write(']') + + else: + json.dump(content, f, sort_keys=True) + + +def load_cam_params(path): + """Loads camera parameters from a JSON file. + + :param path: Path to the JSON file. + :return: Dictionary with the following items: + - 'im_size': (width, height). + - 'K': 3x3 intrinsic camera matrix. + - 'depth_scale': Scale factor to convert the depth images to mm (optional). + """ + c = load_json(path) + + cam = { + 'im_size': (c['width'], c['height']), + 'K': np.array([[c['fx'], 0.0, c['cx']], + [0.0, c['fy'], c['cy']], + [0.0, 0.0, 1.0]]) + } + + if 'depth_scale' in c.keys(): + cam['depth_scale'] = float(c['depth_scale']) + + return cam + + +def load_scene_camera(path): + """Loads content of a JSON file with information about the scene camera. + + See docs/bop_datasets_format.md for details. + + :param path: Path to the JSON file. + :return: Dictionary with the loaded content. + """ + scene_camera = load_json(path, keys_to_int=True) + + for im_id in scene_camera.keys(): + if 'cam_K' in scene_camera[im_id].keys(): + scene_camera[im_id]['cam_K'] = \ + np.array(scene_camera[im_id]['cam_K'], np.float).reshape((3, 3)) + if 'cam_R_w2c' in scene_camera[im_id].keys(): + scene_camera[im_id]['cam_R_w2c'] = \ + np.array(scene_camera[im_id]['cam_R_w2c'], np.float).reshape((3, 3)) + if 'cam_t_w2c' in scene_camera[im_id].keys(): + scene_camera[im_id]['cam_t_w2c'] = \ + np.array(scene_camera[im_id]['cam_t_w2c'], np.float).reshape((3, 1)) + return scene_camera + + +def save_scene_camera(path, scene_camera): + """Saves information about the scene camera to a JSON file. + + See docs/bop_datasets_format.md for details. + + :param path: Path to the output JSON file. + :param scene_camera: Dictionary to save to the JSON file. + """ + for im_id in sorted(scene_camera.keys()): + im_camera = scene_camera[im_id] + if 'cam_K' in im_camera.keys(): + im_camera['cam_K'] = im_camera['cam_K'].flatten().tolist() + if 'cam_R_w2c' in im_camera.keys(): + im_camera['cam_R_w2c'] = im_camera['cam_R_w2c'].flatten().tolist() + if 'cam_t_w2c' in im_camera.keys(): + im_camera['cam_t_w2c'] = im_camera['cam_t_w2c'].flatten().tolist() + save_json(path, scene_camera) + + +def load_scene_gt(path): + """Loads content of a JSON file with ground-truth annotations. + + See docs/bop_datasets_format.md for details. + + :param path: Path to the JSON file. + :return: Dictionary with the loaded content. + """ + scene_gt = load_json(path, keys_to_int=True) + + for im_id, im_gt in scene_gt.items(): + for gt in im_gt: + if 'cam_R_m2c' in gt.keys(): + gt['cam_R_m2c'] = np.array(gt['cam_R_m2c'], np.float).reshape((3, 3)) + if 'cam_t_m2c' in gt.keys(): + gt['cam_t_m2c'] = np.array(gt['cam_t_m2c'], np.float).reshape((3, 1)) + return scene_gt + + +def save_scene_gt(path, scene_gt): + """Saves ground-truth annotations to a JSON file. + + See docs/bop_datasets_format.md for details. + + :param path: Path to the output JSON file. + :param scene_gt: Dictionary to save to the JSON file. + """ + for im_id in sorted(scene_gt.keys()): + im_gts = scene_gt[im_id] + for gt in im_gts: + if 'cam_R_m2c' in gt.keys(): + gt['cam_R_m2c'] = gt['cam_R_m2c'].flatten().tolist() + if 'cam_t_m2c' in gt.keys(): + gt['cam_t_m2c'] = gt['cam_t_m2c'].flatten().tolist() + if 'obj_bb' in gt.keys(): + gt['obj_bb'] = [int(x) for x in gt['obj_bb']] + save_json(path, scene_gt) + + +def load_bop_results(path, version='bop19'): + """Loads 6D object pose estimates from a file. + + :param path: Path to a file with pose estimates. + :param version: Version of the results. + :return: List of loaded poses. + """ + results = [] + + # See docs/bop_challenge_2019.md for details. + if version == 'bop19': + header = 'scene_id,im_id,obj_id,score,R,t,time' + with open(path, 'r') as f: + line_id = 0 + for line in f: + line_id += 1 + if line_id == 1 and header in line: + continue + else: + elems = line.split(',') + if len(elems) != 7: + raise ValueError( + 'A line does not have 7 comma-sep. elements: {}'.format(line)) + + result = { + 'scene_id': int(elems[0]), + 'im_id': int(elems[1]), + 'obj_id': int(elems[2]), + 'score': float(elems[3]), + 'R': np.array( + map(float, elems[4].split()), np.float).reshape((3, 3)), + 't': np.array( + map(float, elems[5].split()), np.float).reshape((3, 1)), + 'time': float(elems[6]) + } + + results.append(result) + else: + raise ValueError('Unknown version of BOP results.') + + return results + + +def save_bop_results(path, results, version='bop19'): + """Saves 6D object pose estimates to a file. + + :param path: Path to the output file. + :param results: Dictionary with pose estimates. + :param version: Version of the results. + """ + # See docs/bop_challenge_2019.md for details. + if version == 'bop19': + lines = ['scene_id,im_id,obj_id,score,R,t,time'] + for res in results: + if 'time' in res: + run_time = res['time'] + else: + run_time = -1 + + lines.append('{scene_id},{im_id},{obj_id},{score},{R},{t},{time}'.format( + scene_id=res['scene_id'], + im_id=res['im_id'], + obj_id=res['obj_id'], + score=res['score'], + R=' '.join(map(str, res['R'].flatten().tolist())), + t=' '.join(map(str, res['t'].flatten().tolist())), + time=run_time)) + + with open(path, 'w') as f: + f.write('\n'.join(lines)) + + else: + raise ValueError('Unknown version of BOP results.') + + +def check_bop_results(path, version='bop19'): + """Checks if the format of BOP results is correct. + + :param result_filenames: Path to a file with pose estimates. + :param version: Version of the results. + :return: True if the format is correct, False if it is not correct. + """ + check_passed = True + check_msg = 'OK' + try: + results = load_bop_results(path, version) + + if version == 'bop19': + # Check if the time for all estimates from the same image are the same. + times = {} + for result in results: + result_key = '{:06d}_{:06d}'.format(result['scene_id'], result['im_id']) + if result_key in times: + if abs(times[result_key] - result['time']) > 0.001: + check_passed = False + check_msg = \ + 'The running time for scene {} and image {} is not the same for' \ + ' all estimates'.format(result['scene_id'], result['im_id']) + misc.log(check_msg) + break + else: + times[result_key] = result['time'] + + except Exception as e: + check_passed = False + check_msg = 'ERROR when loading file {}:\n{}'.format(path, e) + misc.log(check_msg) + + return check_passed, check_msg + + +def load_ply(path): + """Loads a 3D mesh model from a PLY file. + + :param path: Path to a PLY file. + :return: The loaded model given by a dictionary with items: + - 'pts' (nx3 ndarray) + - 'normals' (nx3 ndarray), optional + - 'colors' (nx3 ndarray), optional + - 'faces' (mx3 ndarray), optional + - 'texture_uv' (nx2 ndarray), optional + - 'texture_uv_face' (mx6 ndarray), optional + - 'texture_file' (string), optional + """ + f = open(path, 'rb') + + # Only triangular faces are supported. + face_n_corners = 3 + + n_pts = 0 + n_faces = 0 + pt_props = [] + face_props = [] + is_binary = False + header_vertex_section = False + header_face_section = False + texture_file = None + + # Read the header. + while True: + + # Strip the newline character(s). + line = f.readline().rstrip('\n').rstrip('\r') + + if line.startswith('comment TextureFile'): + texture_file = line.split()[-1] + elif line.startswith('element vertex'): + n_pts = int(line.split()[-1]) + header_vertex_section = True + header_face_section = False + elif line.startswith('element face'): + n_faces = int(line.split()[-1]) + header_vertex_section = False + header_face_section = True + elif line.startswith('element'): # Some other element. + header_vertex_section = False + header_face_section = False + elif line.startswith('property') and header_vertex_section: + # (name of the property, data type) + pt_props.append((line.split()[-1], line.split()[-2])) + elif line.startswith('property list') and header_face_section: + elems = line.split() + if elems[-1] == 'vertex_indices' or elems[-1] == 'vertex_index': + # (name of the property, data type) + face_props.append(('n_corners', elems[2])) + for i in range(face_n_corners): + face_props.append(('ind_' + str(i), elems[3])) + elif elems[-1] == 'texcoord': + # (name of the property, data type) + face_props.append(('texcoord', elems[2])) + for i in range(face_n_corners * 2): + face_props.append(('texcoord_ind_' + str(i), elems[3])) + else: + misc.log('Warning: Not supported face property: ' + elems[-1]) + elif line.startswith('format'): + if 'binary' in line: + is_binary = True + elif line.startswith('end_header'): + break + + # Prepare data structures. + model = {} + if texture_file is not None: + model['texture_file'] = texture_file + model['pts'] = np.zeros((n_pts, 3), np.float) + if n_faces > 0: + model['faces'] = np.zeros((n_faces, face_n_corners), np.float) + + pt_props_names = [p[0] for p in pt_props] + face_props_names = [p[0] for p in face_props] + + is_normal = False + if {'nx', 'ny', 'nz'}.issubset(set(pt_props_names)): + is_normal = True + model['normals'] = np.zeros((n_pts, 3), np.float) + + is_color = False + if {'red', 'green', 'blue'}.issubset(set(pt_props_names)): + is_color = True + model['colors'] = np.zeros((n_pts, 3), np.float) + + is_texture_pt = False + if {'texture_u', 'texture_v'}.issubset(set(pt_props_names)): + is_texture_pt = True + model['texture_uv'] = np.zeros((n_pts, 2), np.float) + + is_texture_face = False + if {'texcoord'}.issubset(set(face_props_names)): + is_texture_face = True + model['texture_uv_face'] = np.zeros((n_faces, 6), np.float) + + # Formats for the binary case. + formats = { + 'float': ('f', 4), + 'double': ('d', 8), + 'int': ('i', 4), + 'uchar': ('B', 1) + } + + # Load vertices. + for pt_id in range(n_pts): + prop_vals = {} + load_props = ['x', 'y', 'z', 'nx', 'ny', 'nz', + 'red', 'green', 'blue', 'texture_u', 'texture_v'] + if is_binary: + for prop in pt_props: + format = formats[prop[1]] + read_data = f.read(format[1]) + val = struct.unpack(format[0], read_data)[0] + if prop[0] in load_props: + prop_vals[prop[0]] = val + else: + elems = f.readline().rstrip('\n').rstrip('\r').split() + for prop_id, prop in enumerate(pt_props): + if prop[0] in load_props: + prop_vals[prop[0]] = elems[prop_id] + + model['pts'][pt_id, 0] = float(prop_vals['x']) + model['pts'][pt_id, 1] = float(prop_vals['y']) + model['pts'][pt_id, 2] = float(prop_vals['z']) + + if is_normal: + model['normals'][pt_id, 0] = float(prop_vals['nx']) + model['normals'][pt_id, 1] = float(prop_vals['ny']) + model['normals'][pt_id, 2] = float(prop_vals['nz']) + + if is_color: + model['colors'][pt_id, 0] = float(prop_vals['red']) + model['colors'][pt_id, 1] = float(prop_vals['green']) + model['colors'][pt_id, 2] = float(prop_vals['blue']) + + if is_texture_pt: + model['texture_uv'][pt_id, 0] = float(prop_vals['texture_u']) + model['texture_uv'][pt_id, 1] = float(prop_vals['texture_v']) + + # Load faces. + for face_id in range(n_faces): + prop_vals = {} + if is_binary: + for prop in face_props: + format = formats[prop[1]] + val = struct.unpack(format[0], f.read(format[1]))[0] + if prop[0] == 'n_corners': + if val != face_n_corners: + raise ValueError('Only triangular faces are supported.') + elif prop[0] == 'texcoord': + if val != face_n_corners * 2: + raise ValueError('Wrong number of UV face coordinates.') + else: + prop_vals[prop[0]] = val + else: + elems = f.readline().rstrip('\n').rstrip('\r').split() + for prop_id, prop in enumerate(face_props): + if prop[0] == 'n_corners': + if int(elems[prop_id]) != face_n_corners: + raise ValueError('Only triangular faces are supported.') + elif prop[0] == 'texcoord': + if int(elems[prop_id]) != face_n_corners * 2: + raise ValueError('Wrong number of UV face coordinates.') + else: + prop_vals[prop[0]] = elems[prop_id] + + model['faces'][face_id, 0] = int(prop_vals['ind_0']) + model['faces'][face_id, 1] = int(prop_vals['ind_1']) + model['faces'][face_id, 2] = int(prop_vals['ind_2']) + + if is_texture_face: + for i in range(6): + model['texture_uv_face'][face_id, i] = float( + prop_vals['texcoord_ind_{}'.format(i)]) + + f.close() + + return model + + +def save_ply(path, model, extra_header_comments=None): + """Saves a 3D mesh model to a PLY file. + + :param path: Path to a PLY file. + :param model: 3D model given by a dictionary with items: + - 'pts' (nx3 ndarray) + - 'normals' (nx3 ndarray, optional) + - 'colors' (nx3 ndarray, optional) + - 'faces' (mx3 ndarray, optional) + - 'texture_uv' (nx2 ndarray, optional) + - 'texture_uv_face' (mx6 ndarray, optional) + - 'texture_file' (string, optional) + :param extra_header_comments: Extra header comment (optional). + """ + pts = model['pts'] + pts_colors = model['colors'] if 'colors' in model.keys() else np.array([]) + pts_normals = model['normals'] if 'normals' in model.keys() else np.array([]) + faces = model['faces'] if 'faces' in model.keys() else np.array([]) + texture_uv = model[ + 'texture_uv'] if 'texture_uv' in model.keys() else np.array([]) + texture_uv_face = model[ + 'texture_uv_face'] if 'texture_uv_face' in model.keys() else np.array([]) + texture_file = model[ + 'texture_file'] if 'texture_file' in model.keys() else np.array([]) + + save_ply2(path, pts, pts_colors, pts_normals, faces, texture_uv, + texture_uv_face, + texture_file, extra_header_comments) + + +def save_ply2(path, pts, pts_colors=None, pts_normals=None, faces=None, + texture_uv=None, texture_uv_face=None, texture_file=None, + extra_header_comments=None): + """Saves a 3D mesh model to a PLY file. + + :param path: Path to the resulting PLY file. + :param pts: nx3 ndarray with vertices. + :param pts_colors: nx3 ndarray with vertex colors (optional). + :param pts_normals: nx3 ndarray with vertex normals (optional). + :param faces: mx3 ndarray with mesh faces (optional). + :param texture_uv: nx2 ndarray with per-vertex UV texture coordinates + (optional). + :param texture_uv_face: mx6 ndarray with per-face UV texture coordinates + (optional). + :param texture_file: Path to a texture image -- relative to the resulting + PLY file (optional). + :param extra_header_comments: Extra header comment (optional). + """ + pts_colors = np.array(pts_colors) + if pts_colors is not None: + assert (len(pts) == len(pts_colors)) + + valid_pts_count = 0 + for pt_id, pt in enumerate(pts): + if not np.isnan(np.sum(pt)): + valid_pts_count += 1 + + f = open(path, 'w') + f.write( + 'ply\n' + 'format ascii 1.0\n' + # 'format binary_little_endian 1.0\n' + ) + + if texture_file is not None: + f.write('comment TextureFile {}\n'.format(texture_file)) + + if extra_header_comments is not None: + for comment in extra_header_comments: + f.write('comment {}\n'.format(comment)) + + f.write( + 'element vertex ' + str(valid_pts_count) + '\n' + 'property float x\n' + 'property float y\n' + 'property float z\n' + ) + if pts_normals is not None: + f.write( + 'property float nx\n' + 'property float ny\n' + 'property float nz\n' + ) + if pts_colors is not None: + f.write( + 'property uchar red\n' + 'property uchar green\n' + 'property uchar blue\n' + ) + if texture_uv is not None: + f.write( + 'property float texture_u\n' + 'property float texture_v\n' + ) + if faces is not None: + f.write( + 'element face ' + str(len(faces)) + '\n' + 'property list uchar int vertex_indices\n' + ) + if texture_uv_face is not None: + f.write( + 'property list uchar float texcoord\n' + ) + f.write('end_header\n') + + format_float = "{:.4f}" + format_2float = " ".join((format_float for _ in range(2))) + format_3float = " ".join((format_float for _ in range(3))) + format_int = "{:d}" + format_3int = " ".join((format_int for _ in range(3))) + + # Save vertices. + for pt_id, pt in enumerate(pts): + if not np.isnan(np.sum(pt)): + f.write(format_3float.format(*pts[pt_id].astype(float))) + if pts_normals is not None: + f.write(' ') + f.write(format_3float.format(*pts_normals[pt_id].astype(float))) + if pts_colors is not None: + f.write(' ') + f.write(format_3int.format(*pts_colors[pt_id].astype(int))) + if texture_uv is not None: + f.write(' ') + f.write(format_2float.format(*texture_uv[pt_id].astype(float))) + f.write('\n') + + # Save faces. + if faces is not None: + for face_id, face in enumerate(faces): + line = ' '.join(map(str, map(int, [len(face)] + list(face.squeeze())))) + if texture_uv_face is not None: + uv = texture_uv_face[face_id] + line += ' ' + ' '.join( + map(str, [len(uv)] + map(float, list(uv.squeeze())))) + f.write(line) + f.write('\n') + + f.close() diff --git a/bop_toolkit_lib/misc.py b/bop_toolkit_lib/misc.py new file mode 100644 index 0000000..a6922f0 --- /dev/null +++ b/bop_toolkit_lib/misc.py @@ -0,0 +1,378 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Miscellaneous functions.""" + +import os +import sys +import time +import math +import subprocess +import numpy as np +from scipy.spatial import distance + +from bop_toolkit_lib import transform + + +def log(s): + """A logging function. + + :param s: String to print (with the current date and time). + """ + sys.stdout.write('{}: {}\n'.format(time.strftime('%m/%d|%H:%M:%S'), s)) + sys.stdout.flush() + + +def ensure_dir(path): + """Ensures that the specified directory exists. + + :param path: Path to the directory. + """ + if not os.path.exists(path): + os.makedirs(path) + + +def get_symmetry_transformations(model_info, max_sym_disc_step): + """Returns a set of symmetry transformations for an object model. + + :param model_info: See files models_info.json provided with the datasets. + :param max_sym_disc_step: The maximum fraction of the object diameter which + the vertex that is the furthest from the axis of continuous rotational + symmetry travels between consecutive discretized rotations. + :return: The set of symmetry transformations. + """ + # Identity. + trans = [{'R': np.eye(3), 't': np.array([[0, 0, 0]]).T}] + + # Discrete symmetries. + if 'symmetries_discrete' in model_info: + for sym in model_info['symmetries_discrete']: + sym_4x4 = np.reshape(sym, (4, 4)) + R = sym_4x4[:3, :3] + t = sym_4x4[:3, 3].reshape((3, 1)) + trans.append({'R': R, 't': t}) + + # Discretized continuous symmetries. + if 'symmetries_continuous' in model_info: + for sym in model_info['symmetries_continuous']: + axis = np.array(sym['axis']) + offset = np.array(sym['offset']).reshape((3, 1)) + + # (PI * diameter) / (max_sym_disc_step * diameter) = discrete_steps_count + discrete_steps_count = int(np.ceil(np.pi / max_sym_disc_step)) + + # Discrete step in radians. + discrete_step = 2.0 * np.pi / discrete_steps_count + + for i in range(1, discrete_steps_count): + R = transform.rotation_matrix(i * discrete_step, axis)[:3, :3] + t = -R.dot(offset) + offset + trans.append({'R': R, 't': t}) + + return trans + + +def project_pts(pts, K, R, t): + """Projects 3D points. + + :param pts: nx3 ndarray with the 3D points. + :param K: 3x3 ndarray with an intrinsic camera matrix. + :param R: 3x3 ndarray with a rotation matrix. + :param t: 3x1 ndarray with a translation vector. + :return: nx2 ndarray with 2D image coordinates of the projections. + """ + assert (pts.shape[1] == 3) + P = K.dot(np.hstack((R, t))) + pts_h = np.hstack((pts, np.ones((pts.shape[0], 1)))) + pts_im = P.dot(pts_h.T) + pts_im /= pts_im[2, :] + return pts_im[:2, :].T + + +class Precomputer(object): + """ Caches pre_Xs, pre_Ys for a 30% speedup of depth_im_to_dist_im() + """ + xs, ys = None, None + pre_Xs,pre_Ys = None,None + depth_im_shape = None + K = None + + @staticmethod + def precompute_lazy(depth_im, K): + """ Lazy precomputation for depth_im_to_dist_im() if depth_im.shape or K changes + + :param depth_im: hxw ndarray with the input depth image, where depth_im[y, x] + is the Z coordinate of the 3D point [X, Y, Z] that projects to pixel [x, y], + or 0 if there is no such 3D point (this is a typical output of the + Kinect-like sensors). + :param K: 3x3 ndarray with an intrinsic camera matrix. + :return: hxw ndarray (Xs/depth_im, Ys/depth_im) + """ + if depth_im.shape != Precomputer.depth_im_shape: + Precomputer.depth_im_shape = depth_im.shape + Precomputer.xs, Precomputer.ys = np.meshgrid( + np.arange(depth_im.shape[1]), np.arange(depth_im.shape[0])) + + if depth_im.shape != Precomputer.depth_im_shape \ + or not np.all(K == Precomputer.K): + Precomputer.K = K + Precomputer.pre_Xs = (Precomputer.xs - K[0, 2]) / np.float64(K[0, 0]) + Precomputer.pre_Ys = (Precomputer.ys - K[1, 2]) / np.float64(K[1, 1]) + + return Precomputer.pre_Xs, Precomputer.pre_Ys + + +def depth_im_to_dist_im_fast(depth_im, K): + """Converts a depth image to a distance image. + + :param depth_im: hxw ndarray with the input depth image, where depth_im[y, x] + is the Z coordinate of the 3D point [X, Y, Z] that projects to pixel [x, y], + or 0 if there is no such 3D point (this is a typical output of the + Kinect-like sensors). + :param K: 3x3 ndarray with an intrinsic camera matrix. + :return: hxw ndarray with the distance image, where dist_im[y, x] is the + distance from the camera center to the 3D point [X, Y, Z] that projects to + pixel [x, y], or 0 if there is no such 3D point. + """ + # Only recomputed if depth_im.shape or K changes. + pre_Xs, pre_Ys = Precomputer.precompute_lazy(depth_im, K) + + dist_im = np.sqrt( + np.multiply(pre_Xs, depth_im)**2 + + np.multiply(pre_Ys, depth_im)**2 + + depth_im.astype(np.float64)**2) + + return dist_im + + +def depth_im_to_dist_im(depth_im, K): + """Converts a depth image to a distance image. + :param depth_im: hxw ndarray with the input depth image, where depth_im[y, x] + is the Z coordinate of the 3D point [X, Y, Z] that projects to pixel [x, y], + or 0 if there is no such 3D point (this is a typical output of the + Kinect-like sensors). + :param K: 3x3 ndarray with an intrinsic camera matrix. + :return: hxw ndarray with the distance image, where dist_im[y, x] is the + distance from the camera center to the 3D point [X, Y, Z] that projects to + pixel [x, y], or 0 if there is no such 3D point. + """ + xs, ys = np.meshgrid( + np.arange(depth_im.shape[1]), np.arange(depth_im.shape[0])) + + Xs = np.multiply(xs - K[0, 2], depth_im) * (1.0 / K[0, 0]) + Ys = np.multiply(ys - K[1, 2], depth_im) * (1.0 / K[1, 1]) + + dist_im = np.sqrt( + Xs.astype(np.float64)**2 + + Ys.astype(np.float64)**2 + + depth_im.astype(np.float64)**2) + # dist_im = np.linalg.norm(np.dstack((Xs, Ys, depth_im)), axis=2) # Slower. + + return dist_im + +def clip_pt_to_im(pt, im_size): + """Clips a 2D point to the image frame. + + :param pt: 2D point (x, y). + :param im_size: Image size (width, height). + :return: Clipped 2D point (x, y). + """ + return [min(max(pt[0], 0), im_size[0] - 1), + min(max(pt[1], 0), im_size[1] - 1)] + + +def calc_2d_bbox(xs, ys, im_size=None, clip=False): + """Calculates 2D bounding box of the given set of 2D points. + + :param xs: 1D ndarray with x-coordinates of 2D points. + :param ys: 1D ndarray with y-coordinates of 2D points. + :param im_size: Image size (width, height) (used for optional clipping). + :param clip: Whether to clip the bounding box (default == False). + :return: 2D bounding box (x, y, w, h), where (x, y) is the top-left corner + and (w, h) is width and height of the bounding box. + """ + bb_min = [xs.min(), ys.min()] + bb_max = [xs.max(), ys.max()] + if clip: + assert (im_size is not None) + bb_min = clip_pt_to_im(bb_min, im_size) + bb_max = clip_pt_to_im(bb_max, im_size) + return [bb_min[0], bb_min[1], bb_max[0] - bb_min[0], bb_max[1] - bb_min[1]] + + +def calc_3d_bbox(xs, ys, zs): + """Calculates 3D bounding box of the given set of 3D points. + + :param xs: 1D ndarray with x-coordinates of 3D points. + :param ys: 1D ndarray with y-coordinates of 3D points. + :param zs: 1D ndarray with z-coordinates of 3D points. + :return: 3D bounding box (x, y, z, w, h, d), where (x, y, z) is the top-left + corner and (w, h, d) is width, height and depth of the bounding box. + """ + bb_min = [xs.min(), ys.min(), zs.min()] + bb_max = [xs.max(), ys.max(), zs.max()] + return [bb_min[0], bb_min[1], bb_min[2], + bb_max[0] - bb_min[0], bb_max[1] - bb_min[1], bb_max[2] - bb_min[2]] + + +def iou(bb_a, bb_b): + """Calculates the Intersection over Union (IoU) of two 2D bounding boxes. + + :param bb_a: 2D bounding box (x1, y1, w1, h1) -- see calc_2d_bbox. + :param bb_b: 2D bounding box (x2, y2, w2, h2) -- see calc_2d_bbox. + :return: The IoU value. + """ + # [x1, y1, width, height] --> [x1, y1, x2, y2] + tl_a, br_a = (bb_a[0], bb_a[1]), (bb_a[0] + bb_a[2], bb_a[1] + bb_a[3]) + tl_b, br_b = (bb_b[0], bb_b[1]), (bb_b[0] + bb_b[2], bb_b[1] + bb_b[3]) + + # Intersection rectangle. + tl_inter = max(tl_a[0], tl_b[0]), max(tl_a[1], tl_b[1]) + br_inter = min(br_a[0], br_b[0]), min(br_a[1], br_b[1]) + + # Width and height of the intersection rectangle. + w_inter = br_inter[0] - tl_inter[0] + h_inter = br_inter[1] - tl_inter[1] + + if w_inter > 0 and h_inter > 0: + area_inter = w_inter * h_inter + area_a = bb_a[2] * bb_a[3] + area_b = bb_b[2] * bb_b[3] + iou = area_inter / float(area_a + area_b - area_inter) + else: + iou = 0.0 + + return iou + + +def transform_pts_Rt(pts, R, t): + """Applies a rigid transformation to 3D points. + + :param pts: nx3 ndarray with 3D points. + :param R: 3x3 ndarray with a rotation matrix. + :param t: 3x1 ndarray with a translation vector. + :return: nx3 ndarray with transformed 3D points. + """ + assert (pts.shape[1] == 3) + pts_t = R.dot(pts.T) + t.reshape((3, 1)) + return pts_t.T + + +def calc_pts_diameter(pts): + """Calculates the diameter of a set of 3D points (i.e. the maximum distance + between any two points in the set). + + :param pts: nx3 ndarray with 3D points. + :return: The calculated diameter. + """ + diameter = -1.0 + for pt_id in range(pts.shape[0]): + pt_dup = np.tile(np.array([pts[pt_id, :]]), [pts.shape[0] - pt_id, 1]) + pts_diff = pt_dup - pts[pt_id:, :] + max_dist = math.sqrt((pts_diff * pts_diff).sum(axis=1).max()) + if max_dist > diameter: + diameter = max_dist + return diameter + + +def calc_pts_diameter2(pts): + """Calculates the diameter of a set of 3D points (i.e. the maximum distance + between any two points in the set). Faster but requires more memory than + calc_pts_diameter. + + :param pts: nx3 ndarray with 3D points. + :return: The calculated diameter. + """ + dists = distance.cdist(pts, pts, 'euclidean') + diameter = np.max(dists) + return diameter + + +def overlapping_sphere_projections(radius, p1, p2): + """Checks if projections of two spheres overlap (approximated). + + :param radius: Radius of the two spheres. + :param p1: [X1, Y1, Z1] center of the first sphere. + :param p2: [X2, Y2, Z2] center of the second sphere. + :return: True if the projections of the two spheres overlap. + """ + # 2D projections of centers of the spheres. + proj1 = (p1 / p1[2])[:2] + proj2 = (p2 / p2[2])[:2] + + # Distance between the center projections. + proj_dist = np.linalg.norm(proj1 - proj2) + + # The max. distance of the center projections at which the sphere projections, + # i.e. sphere silhouettes, still overlap (approximated). + proj_dist_thresh = radius * (1.0 / p1[2] + 1.0 / p2[2]) + + return proj_dist < proj_dist_thresh + + +def get_error_signature(error_type, n_top, **kwargs): + """Generates a signature for the specified settings of pose error calculation. + + :param error_type: Type of error. + :param n_top: Top N pose estimates (with the highest score) to be evaluated + for each object class in each image. + :return: Generated signature. + """ + error_sign = 'error=' + error_type + '_ntop=' + str(n_top) + if error_type == 'vsd': + if kwargs['vsd_tau'] == float('inf'): + vsd_tau_str = 'inf' + else: + vsd_tau_str = '{:.3f}'.format(kwargs['vsd_tau']) + error_sign += '_delta={:.3f}_tau={}'.format( + kwargs['vsd_delta'], vsd_tau_str) + return error_sign + + +def get_score_signature(correct_th, visib_gt_min): + """Generates a signature for a performance score. + + :param visib_gt_min: Minimum visible surface fraction of a valid GT pose. + :return: Generated signature. + """ + eval_sign = 'th=' + '-'.join(['{:.3f}'.format(t) for t in correct_th]) + eval_sign += '_min-visib={:.3f}'.format(visib_gt_min) + return eval_sign + + +def run_meshlab_script(meshlab_server_path, meshlab_script_path, model_in_path, + model_out_path, attrs_to_save): + """Runs a MeshLab script on a 3D model. + + meshlabserver depends on X server. To remove this dependence (on linux), run: + 1) Xvfb :100 & + 2) export DISPLAY=:100.0 + 3) meshlabserver + + :param meshlab_server_path: Path to meshlabserver.exe. + :param meshlab_script_path: Path to an MLX MeshLab script. + :param model_in_path: Path to the input 3D model saved in the PLY format. + :param model_out_path: Path to the output 3D model saved in the PLY format. + :param attrs_to_save: Attributes to save: + - vc -> vertex colors + - vf -> vertex flags + - vq -> vertex quality + - vn -> vertex normals + - vt -> vertex texture coords + - fc -> face colors + - ff -> face flags + - fq -> face quality + - fn -> face normals + - wc -> wedge colors + - wn -> wedge normals + - wt -> wedge texture coords + """ + meshlabserver_cmd = [meshlab_server_path, '-s', meshlab_script_path, '-i', + model_in_path, '-o', model_out_path] + + if len(attrs_to_save): + meshlabserver_cmd += ['-m'] + attrs_to_save + + log(' '.join(meshlabserver_cmd)) + if subprocess.call(meshlabserver_cmd) != 0: + exit(-1) diff --git a/bop_toolkit_lib/pose_error.py b/bop_toolkit_lib/pose_error.py new file mode 100644 index 0000000..a4463eb --- /dev/null +++ b/bop_toolkit_lib/pose_error.py @@ -0,0 +1,330 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Implementation of the pose error functions described in: +Hodan, Michel et al., "BOP: Benchmark for 6D Object Pose Estimation", ECCV'18 +Hodan et al., "On Evaluation of 6D Object Pose Estimation", ECCVW'16 +""" + +import math +import numpy as np +from scipy import spatial + +from bop_toolkit_lib import misc +from bop_toolkit_lib import visibility + + +def vsd(R_est, t_est, R_gt, t_gt, depth_test, K, delta, taus, + normalized_by_diameter, diameter, renderer, obj_id, cost_type='step'): + """Visible Surface Discrepancy -- by Hodan, Michel et al. (ECCV 2018). + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param depth_test: hxw ndarray with the test depth image. + :param K: 3x3 ndarray with an intrinsic camera matrix. + :param delta: Tolerance used for estimation of the visibility masks. + :param taus: A list of misalignment tolerance values. + :param normalized_by_diameter: Whether to normalize the pixel-wise distances + by the object diameter. + :param diameter: Object diameter. + :param renderer: Instance of the Renderer class (see renderer.py). + :param obj_id: Object identifier. + :param cost_type: Type of the pixel-wise matching cost: + 'tlinear' - Used in the original definition of VSD in: + Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16 + 'step' - Used for SIXD Challenge 2017 onwards. + :return: List of calculated errors (one for each misalignment tolerance). + """ + # Render depth images of the model in the estimated and the ground-truth pose. + fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + depth_est = renderer.render_object( + obj_id, R_est, t_est, fx, fy, cx, cy)['depth'] + depth_gt = renderer.render_object( + obj_id, R_gt, t_gt, fx, fy, cx, cy)['depth'] + + # Convert depth images to distance images. + dist_test = misc.depth_im_to_dist_im_fast(depth_test, K) + dist_gt = misc.depth_im_to_dist_im_fast(depth_gt, K) + dist_est = misc.depth_im_to_dist_im_fast(depth_est, K) + + # Visibility mask of the model in the ground-truth pose. + visib_gt = visibility.estimate_visib_mask_gt( + dist_test, dist_gt, delta, visib_mode='bop19') + + # Visibility mask of the model in the estimated pose. + visib_est = visibility.estimate_visib_mask_est( + dist_test, dist_est, visib_gt, delta, visib_mode='bop19') + + # Intersection and union of the visibility masks. + visib_inter = np.logical_and(visib_gt, visib_est) + visib_union = np.logical_or(visib_gt, visib_est) + + visib_union_count = visib_union.sum() + visib_comp_count = visib_union_count - visib_inter.sum() + + # Pixel-wise distances. + dists = np.abs(dist_gt[visib_inter] - dist_est[visib_inter]) + + # Normalization of pixel-wise distances by object diameter. + if normalized_by_diameter: + dists /= diameter + + # Calculate VSD for each provided value of the misalignment tolerance. + if visib_union_count == 0: + errors = [1.0] * len(taus) + else: + errors = [] + for tau in taus: + + # Pixel-wise matching cost. + if cost_type == 'step': + costs = dists >= tau + elif cost_type == 'tlinear': # Truncated linear function. + costs = dists / tau + costs[costs > 1.0] = 1.0 + else: + raise ValueError('Unknown pixel matching cost.') + + e = (np.sum(costs) + visib_comp_count) / float(visib_union_count) + errors.append(e) + + return errors + + +def mssd(R_est, t_est, R_gt, t_gt, pts, syms): + """Maximum Symmetry-Aware Surface Distance (MSSD). + + See: http://bop.felk.cvut.cz/challenges/bop-challenge-2019/ + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param pts: nx3 ndarray with 3D model points. + :param syms: Set of symmetry transformations, each given by a dictionary with: + - 'R': 3x3 ndarray with the rotation matrix. + - 't': 3x1 ndarray with the translation vector. + :return: The calculated error. + """ + pts_est = misc.transform_pts_Rt(pts, R_est, t_est) + es = [] + for sym in syms: + R_gt_sym = R_gt.dot(sym['R']) + t_gt_sym = R_gt.dot(sym['t']) + t_gt + pts_gt_sym = misc.transform_pts_Rt(pts, R_gt_sym, t_gt_sym) + es.append(np.linalg.norm(pts_est - pts_gt_sym, axis=1).max()) + return min(es) + + +def mspd(R_est, t_est, R_gt, t_gt, K, pts, syms): + """Maximum Symmetry-Aware Projection Distance (MSPD). + + See: http://bop.felk.cvut.cz/challenges/bop-challenge-2019/ + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param K: 3x3 ndarray with the intrinsic camera matrix. + :param pts: nx3 ndarray with 3D model points. + :param syms: Set of symmetry transformations, each given by a dictionary with: + - 'R': 3x3 ndarray with the rotation matrix. + - 't': 3x1 ndarray with the translation vector. + :return: The calculated error. + """ + proj_est = misc.project_pts(pts, K, R_est, t_est) + es = [] + for sym in syms: + R_gt_sym = R_gt.dot(sym['R']) + t_gt_sym = R_gt.dot(sym['t']) + t_gt + proj_gt_sym = misc.project_pts(pts, K, R_gt_sym, t_gt_sym) + es.append(np.linalg.norm(proj_est - proj_gt_sym, axis=1).max()) + return min(es) + + +def add(R_est, t_est, R_gt, t_gt, pts): + """Average Distance of Model Points for objects with no indistinguishable + views - by Hinterstoisser et al. (ACCV'12). + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param pts: nx3 ndarray with 3D model points. + :return: The calculated error. + """ + pts_est = misc.transform_pts_Rt(pts, R_est, t_est) + pts_gt = misc.transform_pts_Rt(pts, R_gt, t_gt) + e = np.linalg.norm(pts_est - pts_gt, axis=1).mean() + return e + + +def adi(R_est, t_est, R_gt, t_gt, pts): + """Average Distance of Model Points for objects with indistinguishable views + - by Hinterstoisser et al. (ACCV'12). + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param pts: nx3 ndarray with 3D model points. + :return: The calculated error. + """ + pts_est = misc.transform_pts_Rt(pts, R_est, t_est) + pts_gt = misc.transform_pts_Rt(pts, R_gt, t_gt) + + # Calculate distances to the nearest neighbors from vertices in the + # ground-truth pose to vertices in the estimated pose. + nn_index = spatial.cKDTree(pts_est) + nn_dists, _ = nn_index.query(pts_gt, k=1) + + e = nn_dists.mean() + return e + + +def re(R_est, R_gt): + """Rotational Error. + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :return: The calculated error. + """ + assert (R_est.shape == R_gt.shape == (3, 3)) + error_cos = float(0.5 * (np.trace(R_est.dot(np.linalg.inv(R_gt))) - 1.0)) + + # Avoid invalid values due to numerical errors. + error_cos = min(1.0, max(-1.0, error_cos)) + + error = math.acos(error_cos) + error = 180.0 * error / np.pi # Convert [rad] to [deg]. + return error + + +def te(t_est, t_gt): + """Translational Error. + + :param t_est: 3x1 ndarray with the estimated translation vector. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :return: The calculated error. + """ + assert (t_est.size == t_gt.size == 3) + error = np.linalg.norm(t_gt - t_est) + return error + + +def proj(R_est, t_est, R_gt, t_gt, K, pts): + """Average distance of projections of object model vertices [px] + - by Brachmann et al. (CVPR'16). + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param K: 3x3 ndarray with an intrinsic camera matrix. + :param pts: nx3 ndarray with 3D model points. + :return: The calculated error. + """ + proj_est = misc.project_pts(pts, K, R_est, t_est) + proj_gt = misc.project_pts(pts, K, R_gt, t_gt) + e = np.linalg.norm(proj_est - proj_gt, axis=1).mean() + return e + + +def cou_mask(mask_est, mask_gt): + """Complement over Union of 2D binary masks. + + :param mask_est: hxw ndarray with the estimated mask. + :param mask_gt: hxw ndarray with the ground-truth mask. + :return: The calculated error. + """ + mask_est_bool = mask_est.astype(np.bool) + mask_gt_bool = mask_gt.astype(np.bool) + + inter = np.logical_and(mask_gt_bool, mask_est_bool) + union = np.logical_or(mask_gt_bool, mask_est_bool) + + union_count = float(union.sum()) + if union_count > 0: + e = 1.0 - inter.sum() / union_count + else: + e = 1.0 + return e + + +def cus(R_est, t_est, R_gt, t_gt, K, renderer, obj_id): + """Complement over Union of projected 2D masks. + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param K: 3x3 ndarray with an intrinsic camera matrix. + :param renderer: Instance of the Renderer class (see renderer.py). + :param obj_id: Object identifier. + :return: The calculated error. + """ + # Render depth images of the model at the estimated and the ground-truth pose. + fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + depth_est = renderer.render_object( + obj_id, R_est, t_est, fx, fy, cx, cy)['depth'] + depth_gt = renderer.render_object( + obj_id, R_gt, t_gt, fx, fy, cx, cy)['depth'] + + # Masks of the rendered model and their intersection and union. + mask_est = depth_est > 0 + mask_gt = depth_gt > 0 + inter = np.logical_and(mask_gt, mask_est) + union = np.logical_or(mask_gt, mask_est) + + union_count = float(union.sum()) + if union_count > 0: + e = 1.0 - inter.sum() / union_count + else: + e = 1.0 + return e + + +def cou_bb(bb_est, bb_gt): + """Complement over Union of 2D bounding boxes. + + :param bb_est: The estimated bounding box (x1, y1, w1, h1). + :param bb_gt: The ground-truth bounding box (x2, y2, w2, h2). + :return: The calculated error. + """ + e = 1.0 - misc.iou(bb_est, bb_gt) + return e + + +def cou_bb_proj(R_est, t_est, R_gt, t_gt, K, renderer, obj_id): + """Complement over Union of projected 2D bounding boxes. + + :param R_est: 3x3 ndarray with the estimated rotation matrix. + :param t_est: 3x1 ndarray with the estimated translation vector. + :param R_gt: 3x3 ndarray with the ground-truth rotation matrix. + :param t_gt: 3x1 ndarray with the ground-truth translation vector. + :param K: 3x3 ndarray with an intrinsic camera matrix. + :param renderer: Instance of the Renderer class (see renderer.py). + :param obj_id: Object identifier. + :return: The calculated error. + """ + # Render depth images of the model at the estimated and the ground-truth pose. + fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + depth_est = renderer.render_object( + obj_id, R_est, t_est, fx, fy, cx, cy)['depth'] + depth_gt = renderer.render_object( + obj_id, R_gt, t_gt, fx, fy, cx, cy)['depth'] + + # Masks of the rendered model and their intersection and union + mask_est = depth_est > 0 + mask_gt = depth_gt > 0 + + ys_est, xs_est = mask_est.nonzero() + bb_est = misc.calc_2d_bbox(xs_est, ys_est, im_size=None, clip=False) + + ys_gt, xs_gt = mask_gt.nonzero() + bb_gt = misc.calc_2d_bbox(xs_gt, ys_gt, im_size=None, clip=False) + + e = 1.0 - misc.iou(bb_est, bb_gt) + return e diff --git a/bop_toolkit_lib/pose_matching.py b/bop_toolkit_lib/pose_matching.py new file mode 100644 index 0000000..a7ea5a0 --- /dev/null +++ b/bop_toolkit_lib/pose_matching.py @@ -0,0 +1,160 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Matching of estimated poses to the ground-truth poses.""" + +import numpy as np + + +def match_poses(errs, error_ths, max_ests_count=0, gt_valid_mask=None): + """Matches the estimated poses to the ground-truth poses. + + The estimated poses are greedily matched to the ground truth poses in the + order of decreasing score of the estimates. An estimated pose is matched to a + ground-truth pose if the error w.r.t. the ground-truth pose is below the + specified threshold. Each estimated pose is matched to up to one ground-truth + pose and each ground-truth pose is matched to up to one estimated pose. + + :param errs: List of dictionaries, where each dictionary holds the following + info about one pose estimate: + - 'est_id': ID of the pose estimate. + - 'score': Confidence score of the pose estimate. + - 'errors': Dictionary mapping ground-truth ID's to errors of the pose + estimate w.r.t. the ground-truth poses. + :param error_ths: Thresholds of correctness. The pose error can be given + by more than one element (e.g. translational + rotational error), in which + case there is one threshold for each element. + :param max_ests_count: Top k pose estimates to consider (0 = all). + :param gt_valid_mask: Mask of ground-truth poses which can be considered. + :return: List of dictionaries, where each dictionary holds info for one pose + estimate (the estimates are ordered as in errs) about the matching + ground-truth pose: + - 'est_id': ID of the pose estimate. + - 'gt_id': ID of the matched ground-truth pose (-1 means there is no + matching ground-truth pose). + - 'score': Confidence score of the pose estimate. + - 'error': Error of the pose estimate w.r.t. the matched ground-truth pose. + - 'error_norm': Error normalized by the threshold value. + """ + # Sort the estimated poses by decreasing confidence score. + errs_sorted = sorted(errs, key=lambda e: e['score'], reverse=True) + + # Keep only the required number of poses with the highest confidence score. + # 0 = all pose estimates are considered. + if max_ests_count > 0: + errs_sorted = errs_sorted[:max_ests_count] + + # Number of values defining the error (e.g. 1 for "ADD", 2 for "5deg 5cm"). + error_num_elems = len(error_ths) + + # Greedily match the estimated poses to the ground truth poses in the order of + # decreasing score of the estimates. + matches = [] + gt_matched = [] + for e in errs_sorted: + + best_gt_id = -1 + best_error = list(error_ths) + for gt_id, error in e['errors'].items(): + + # If the mask of valid GT poses is not provided, consider all valid. + is_valid = not gt_valid_mask or gt_valid_mask[gt_id] + + # Only valid GT poses that have not been matched yet are considered. + if is_valid and gt_id not in gt_matched: + + # The current pose estimate is considered the best so far if all error + # elements are the lowest so far. + if np.all([error[i] < best_error[i] for i in range(error_num_elems)]): + best_gt_id = gt_id + best_error = error + + if best_gt_id >= 0: + + # Mark the GT pose as matched. + gt_matched.append(best_gt_id) + + # Error normalized by the threshold. + best_errors_normed = [best_error[i] / float(error_ths[i]) + for i in range(error_num_elems)] + + # Save info about the match. + matches.append({ + 'est_id': e['est_id'], + 'gt_id': best_gt_id, + 'score': e['score'], + 'error': best_error, + 'error_norm': best_errors_normed + }) + + return matches + + +def match_poses_scene(scene_id, scene_gt, scene_gt_valid, scene_errs, + correct_th, n_top): + """Matches the estimated poses to the ground-truth poses in one scene. + + :param scene_id: Scene ID. + :param scene_gt: Dictionary mapping image ID's to lists of dictionaries with: + - 'obj_id': Object ID of the ground-truth pose. + :param scene_gt_valid: Dictionary mapping image ID's to lists of boolean + values indicating which ground-truth poses should be considered. + :param scene_errs: List of dictionaries with: + - 'im_id': Image ID. + - 'obj_id': Object ID. + - 'est_id': ID of the pose estimate. + - 'score': Confidence score of the pose estimate. + - 'errors': Dictionary mapping ground-truth ID's to errors of the pose + estimate w.r.t. the ground-truth poses. + :param error_obj_threshs: Dictionary mapping object ID's to values of the + threshold of correctness. + :param n_top: Top N pose estimates (with the highest score) to be evaluated + for each object class in each image. + :return: + """ + # Organize the errors by image ID and object ID (for faster query). + scene_errs_org = {} + for e in scene_errs: + scene_errs_org.setdefault( + e['im_id'], {}).setdefault(e['obj_id'], []).append(e) + + # Matching of poses in individual images. + scene_matches = [] + for im_id, im_gts in scene_gt.items(): + im_matches = [] + + for gt_id, gt in enumerate(im_gts): + im_matches.append({ + 'scene_id': scene_id, + 'im_id': im_id, + 'obj_id': gt['obj_id'], + 'gt_id': gt_id, + 'est_id': -1, + 'score': -1, + 'error': -1, + 'error_norm': -1, + 'valid': scene_gt_valid[im_id][gt_id], + }) + + # Treat estimates of each object separately. + im_obj_ids = set([gt['obj_id'] for gt in im_gts]) + for obj_id in im_obj_ids: + if im_id in scene_errs_org.keys()\ + and obj_id in scene_errs_org[im_id].keys(): + + # Greedily match the estimated poses to the ground truth poses. + errs_im_obj = scene_errs_org[im_id][obj_id] + ms = match_poses( + errs_im_obj, correct_th, n_top, scene_gt_valid[im_id]) + + # Update info about the matched GT poses. + for m in ms: + g = im_matches[m['gt_id']] + g['est_id'] = m['est_id'] + g['score'] = m['score'] + g['error'] = m['error'] + g['error_norm'] = m['error_norm'] + + scene_matches += im_matches + + return scene_matches diff --git a/bop_toolkit_lib/renderer.py b/bop_toolkit_lib/renderer.py new file mode 100644 index 0000000..a96649a --- /dev/null +++ b/bop_toolkit_lib/renderer.py @@ -0,0 +1,97 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Abstract class of a renderer and a factory function to create a renderer. + +The renderer produces an RGB/depth image of a 3D mesh model in a specified pose +for given camera parameters and illumination settings. +""" + + +class Renderer(object): + """Abstract class of a renderer.""" + + def __init__(self, width, height): + """Constructor. + + :param width: Width of the rendered image. + :param height: Height of the rendered image. + """ + self.width = width + self.height = height + + # 3D location of a point light (in the camera coordinates). + self.light_cam_pos = (0, 0, 0) + + # Weight of the ambient light. + self.light_ambient_weight = 0.5 + + def set_light_cam_pos(self, light_cam_pos): + """Sets the 3D location of a point light. + + :param light_cam_pos: [X, Y, Z]. + """ + self.light_cam_pos = light_cam_pos + + def set_light_ambient_weight(self, light_ambient_weight): + """Sets weight of the ambient light. + + :param light_ambient_weight: Scalar from 0 to 1. + """ + self.light_ambient_weight = light_ambient_weight + + def add_object(self, obj_id, model_path, **kwargs): + """Loads an object model. + + :param obj_id: Object identifier. + :param model_path: Path to the object model file. + """ + raise NotImplementedError + + def remove_object(self, obj_id): + """Removes an object model. + + :param obj_id: Identifier of the object to remove. + """ + raise NotImplementedError + + def render_object(self, obj_id, R, t, fx, fy, cx, cy): + """Renders an object model in the specified pose. + + :param obj_id: Object identifier. + :param R: 3x3 ndarray with a rotation matrix. + :param t: 3x1 ndarray with a translation vector. + :param fx: Focal length (X axis). + :param fy: Focal length (Y axis). + :param cx: The X coordinate of the principal point. + :param cy: The Y coordinate of the principal point. + :return: Returns a dictionary with rendered images. + """ + raise NotImplementedError + + +def create_renderer(width, height, renderer_type='cpp', mode='rgb+depth', + shading='phong', bg_color=(0.0, 0.0, 0.0, 0.0)): + """A factory to create a renderer. + + Note: Parameters mode, shading and bg_color are currently supported only by + the Python renderer (renderer_type='python'). + + :param width: Width of the rendered image. + :param height: Height of the rendered image. + :param renderer_type: Type of renderer (options: 'cpp', 'python'). + :param mode: Rendering mode ('rgb+depth', 'rgb', 'depth'). + :param shading: Type of shading ('flat', 'phong'). + :param bg_color: Color of the background (R, G, B, A). + :return: Instance of a renderer of the specified type. + """ + if renderer_type == 'python': + from . import renderer_py + return renderer_py.RendererPython(width, height, mode, shading, bg_color) + + elif renderer_type == 'cpp': + from . import renderer_cpp + return renderer_cpp.RendererCpp(width, height) + + else: + raise ValueError('Unknown renderer type.') diff --git a/bop_toolkit_lib/renderer_cpp.py b/bop_toolkit_lib/renderer_cpp.py new file mode 100644 index 0000000..09a557e --- /dev/null +++ b/bop_toolkit_lib/renderer_cpp.py @@ -0,0 +1,50 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""An interface to the C++ based renderer (bop_renderer).""" + +import sys + +from bop_toolkit_lib import config +from bop_toolkit_lib import renderer + +# C++ renderer (https://github.com/thodan/bop_renderer) +sys.path.append(config.bop_renderer_path) +import bop_renderer + + +class RendererCpp(renderer.Renderer): + """An interface to the C++ based renderer.""" + + def __init__(self, width, height): + """See base class.""" + super(RendererCpp, self).__init__(width, height) + self.renderer = bop_renderer.Renderer() + self.renderer.init(width, height) + + def set_light_cam_pos(self, light_cam_pos): + """See base class.""" + super(RendererCpp, self).set_light_cam_pos(light_cam_pos) + self.renderer.set_light_cam_pos(light_cam_pos) + + def set_light_ambient_weight(self, light_ambient_weight): + """See base class.""" + super(RendererCpp, self).set_light_ambient_weight(light_ambient_weight) + self.renderer.set_light_ambient_weight(light_ambient_weight) + + def add_object(self, obj_id, model_path, **kwargs): + """See base class.""" + self.renderer.add_object(obj_id, model_path) + + def remove_object(self, obj_id): + """See base class.""" + self.renderer.remove_object(obj_id) + + def render_object(self, obj_id, R, t, fx, fy, cx, cy): + """See base class.""" + R_l = map(float, R.flatten().tolist()) + t_l = map(float, t.flatten().tolist()) + self.renderer.render_object(obj_id, R_l, t_l, fx, fy, cx, cy) + rgb = self.renderer.get_color_image(obj_id) + depth = self.renderer.get_depth_image(obj_id) + return {'rgb': rgb, 'depth': depth} diff --git a/bop_toolkit_lib/renderer_py.py b/bop_toolkit_lib/renderer_py.py new file mode 100644 index 0000000..59e0d28 --- /dev/null +++ b/bop_toolkit_lib/renderer_py.py @@ -0,0 +1,555 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""A Python based renderer.""" + +import os +import numpy as np +from glumpy import app, gloo, gl + +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import renderer + +# Set glumpy logging level. +from glumpy.log import log +import logging +log.setLevel(logging.WARNING) # Options: ERROR, WARNING, DEBUG, INFO. + +# Set backend (http://glumpy.readthedocs.io/en/latest/api/app-backends.html). +# app.use('glfw') # Options: 'glfw', 'qt5', 'pyside', 'pyglet'. + + +# RGB vertex shader. +_rgb_vertex_code = """ +uniform mat4 u_mv; +uniform mat4 u_nm; +uniform mat4 u_mvp; +uniform vec3 u_light_eye_pos; + +attribute vec3 a_position; +attribute vec3 a_normal; +attribute vec3 a_color; +attribute vec2 a_texcoord; + +varying vec3 v_color; +varying vec2 v_texcoord; +varying vec3 v_eye_pos; +varying vec3 v_L; +varying vec3 v_normal; + +void main() { + gl_Position = u_mvp * vec4(a_position, 1.0); + v_color = a_color; + v_texcoord = a_texcoord; + + // The following points/vectors are expressed in the eye coordinates. + v_eye_pos = (u_mv * vec4(a_position, 1.0)).xyz; // Vertex. + v_L = normalize(u_light_eye_pos - v_eye_pos); // Vector to the light. + v_normal = normalize(u_nm * vec4(a_normal, 1.0)).xyz; // Normal vector. +} +""" + +# RGB fragment shader - flat shading. +_rgb_fragment_flat_code = """ +uniform float u_light_ambient_w; +uniform sampler2D u_texture; +uniform int u_use_texture; + +varying vec3 v_color; +varying vec2 v_texcoord; +varying vec3 v_eye_pos; +varying vec3 v_L; + +void main() { + // Face normal in eye coords. + vec3 f_normal = normalize(cross(dFdx(v_eye_pos), dFdy(v_eye_pos))); + + float light_diffuse_w = max(dot(normalize(v_L), normalize(f_normal)), 0.0); + float light_w = u_light_ambient_w + light_diffuse_w; + if(light_w > 1.0) light_w = 1.0; + + if(bool(u_use_texture)) { + gl_FragColor = vec4(light_w * texture2D(u_texture, v_texcoord)); + } + else { + gl_FragColor = vec4(light_w * v_color, 1.0); + } +} +""" + +# RGB fragment shader - Phong shading. +_rgb_fragment_phong_code = """ +uniform float u_light_ambient_w; +uniform sampler2D u_texture; +uniform int u_use_texture; + +varying vec3 v_color; +varying vec2 v_texcoord; +varying vec3 v_eye_pos; +varying vec3 v_L; +varying vec3 v_normal; + +void main() { + float light_diffuse_w = max(dot(normalize(v_L), normalize(v_normal)), 0.0); + float light_w = u_light_ambient_w + light_diffuse_w; + if(light_w > 1.0) light_w = 1.0; + + if(bool(u_use_texture)) { + gl_FragColor = vec4(light_w * texture2D(u_texture, v_texcoord)); + } + else { + gl_FragColor = vec4(light_w * v_color, 1.0); + } +} +""" + +# Depth vertex shader. +# Ref: https://github.com/julienr/vertex_visibility/blob/master/depth.py +# +# Getting the depth from the depth buffer in OpenGL is doable, see here: +# http://web.archive.org/web/20130416194336/http://olivers.posterous.com/linear-depth-in-glsl-for-real +# http://web.archive.org/web/20130426093607/http://www.songho.ca/opengl/gl_projectionmatrix.html +# http://stackoverflow.com/a/6657284/116067 +# but it is difficult to achieve high precision, as explained in this article: +# http://dev.theomader.com/depth-precision/ +# +# Once the vertex is in the view coordinates (view * model * v), its depth is +# simply the Z axis. Hence, instead of reading from the depth buffer and undoing +# the projection matrix, we store the Z coord of each vertex in the color +# buffer. OpenGL allows for float32 color buffer components. +_depth_vertex_code = """ +uniform mat4 u_mv; +uniform mat4 u_mvp; +attribute vec3 a_position; +attribute vec3 a_color; +varying float v_eye_depth; + +void main() { + gl_Position = u_mvp * vec4(a_position, 1.0); + vec3 v_eye_pos = (u_mv * vec4(a_position, 1.0)).xyz; // In eye coords. + + // OpenGL Z axis goes out of the screen, so depths are negative + v_eye_depth = -v_eye_pos.z; +} +""" + +# Depth fragment shader. +_depth_fragment_code = """ +varying float v_eye_depth; + +void main() { + gl_FragColor = vec4(v_eye_depth, 0.0, 0.0, 1.0); +} +""" + + +# Functions to calculate transformation matrices. +# Note that OpenGL expects the matrices to be saved column-wise. +# (Ref: http://www.songho.ca/opengl/gl_transform.html) + + +def _calc_model_view(model, view): + """Calculates the model-view matrix. + + :param model: 4x4 ndarray with the model matrix. + :param view: 4x4 ndarray with the view matrix. + :return: 4x4 ndarray with the model-view matrix. + """ + return np.dot(model, view) + + +def _calc_model_view_proj(model, view, proj): + """Calculates the model-view-projection matrix. + + :param model: 4x4 ndarray with the model matrix. + :param view: 4x4 ndarray with the view matrix. + :param proj: 4x4 ndarray with the projection matrix. + :return: 4x4 ndarray with the model-view-projection matrix. + """ + return np.dot(np.dot(model, view), proj) + + +def _calc_normal_matrix(model, view): + """Calculates the normal matrix. + + Ref: http://www.songho.ca/opengl/gl_normaltransform.html + + :param model: 4x4 ndarray with the model matrix. + :param view: 4x4 ndarray with the view matrix. + :return: 4x4 ndarray with the normal matrix. + """ + return np.linalg.inv(np.dot(model, view)).T + + +def _calc_calib_proj(K, x0, y0, w, h, nc, fc, window_coords='y_down'): + """Conversion of Hartley-Zisserman intrinsic matrix to OpenGL proj. matrix. + + Ref: + 1) https://strawlab.org/2011/11/05/augmented-reality-with-OpenGL + 2) https://github.com/strawlab/opengl-hz/blob/master/src/calib_test_utils.py + + :param K: 3x3 ndarray with the intrinsic camera matrix. + :param x0 The X coordinate of the camera image origin (typically 0). + :param y0: The Y coordinate of the camera image origin (typically 0). + :param w: Image width. + :param h: Image height. + :param nc: Near clipping plane. + :param fc: Far clipping plane. + :param window_coords: 'y_up' or 'y_down'. + :return: 4x4 ndarray with the OpenGL projection matrix. + """ + depth = float(fc - nc) + q = -(fc + nc) / depth + qn = -2 * (fc * nc) / depth + + # Draw our images upside down, so that all the pixel-based coordinate + # systems are the same. + if window_coords == 'y_up': + proj = np.array([ + [2 * K[0, 0] / w, -2 * K[0, 1] / w, (-2 * K[0, 2] + w + 2 * x0) / w, 0], + [0, -2 * K[1, 1] / h, (-2 * K[1, 2] + h + 2 * y0) / h, 0], + [0, 0, q, qn], # Sets near and far planes (glPerspective). + [0, 0, -1, 0] + ]) + + # Draw the images upright and modify the projection matrix so that OpenGL + # will generate window coords that compensate for the flipped image coords. + else: + assert window_coords == 'y_down' + proj = np.array([ + [2 * K[0, 0] / w, -2 * K[0, 1] / w, (-2 * K[0, 2] + w + 2 * x0) / w, 0], + [0, 2 * K[1, 1] / h, (2 * K[1, 2] - h + 2 * y0) / h, 0], + [0, 0, q, qn], # Sets near and far planes (glPerspective). + [0, 0, -1, 0] + ]) + return proj.T + + +class RendererPython(renderer.Renderer): + """A Python based renderer.""" + + def __init__(self, width, height, mode='rgb+depth', shading='phong', + bg_color=(0.0, 0.0, 0.0, 0.0)): + """Constructor. + + :param width: Width of the rendered image. + :param height: Height of the rendered image. + :param mode: Rendering mode ('rgb+depth', 'rgb', 'depth'). + :param shading: Type of shading ('flat', 'phong'). + :param bg_color: Color of the background (R, G, B, A). + """ + super(RendererPython, self).__init__(width, height) + + self.mode = mode + self.shading = shading + self.bg_color = bg_color + + # Indicators whether to render RGB and/or depth image. + self.render_rgb = self.mode in ['rgb', 'rgb+depth'] + self.render_depth = self.mode in ['depth', 'rgb+depth'] + + # Structures to store object models and related info. + self.models = {} + self.model_bbox_corners = {} + self.model_textures = {} + + # Rendered images. + self.rgb = None + self.depth = None + + # Window for rendering. + self.window = app.Window(visible=False) + + # Per-object vertex and index buffer. + self.vertex_buffers = {} + self.index_buffers = {} + + # Per-object OpenGL programs for rendering of RGB and depth images. + self.rgb_programs = {} + self.depth_programs = {} + + # The frame buffer object. + rgb_buf = np.zeros( + (self.height, self.width, 4), np.float32).view(gloo.TextureFloat2D) + depth_buf = np.zeros( + (self.height, self.width), np.float32).view(gloo.DepthTexture) + self.fbo = gloo.FrameBuffer(color=rgb_buf, depth=depth_buf) + + # Activate the created frame buffer object. + self.fbo.activate() + + def add_object(self, obj_id, model_path, **kwargs): + """See base class.""" + # Color of the object model (the original color saved with the object model + # will be used if None). + surf_color = None + if 'surf_color' in kwargs: + surf_color = kwargs['surf_color'] + + # Load the object model. + model = inout.load_ply(model_path) + self.models[obj_id] = model + + # Calculate the 3D bounding box of the model (will be used to set the near + # and far clipping plane). + bb = misc.calc_3d_bbox( + model['pts'][:, 0], model['pts'][:, 1], model['pts'][:, 2]) + self.model_bbox_corners[obj_id] = np.array([ + [bb[0], bb[1], bb[2]], + [bb[0], bb[1], bb[2] + bb[5]], + [bb[0], bb[1] + bb[4], bb[2]], + [bb[0], bb[1] + bb[4], bb[2] + bb[5]], + [bb[0] + bb[3], bb[1], bb[2]], + [bb[0] + bb[3], bb[1], bb[2] + bb[5]], + [bb[0] + bb[3], bb[1] + bb[4], bb[2]], + [bb[0] + bb[3], bb[1] + bb[4], bb[2] + bb[5]], + ]) + + # Set texture/color of vertices. + self.model_textures[obj_id] = None + + # Use the specified uniform surface color. + if surf_color is not None: + colors = np.tile(list(surf_color) + [1.0], [model['pts'].shape[0], 1]) + + # Set UV texture coordinates to dummy values. + texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32) + + # Use the model texture. + elif 'texture_file' in self.models[obj_id].keys(): + model_texture_path = os.path.join( + os.path.dirname(model_path), self.models[obj_id]['texture_file']) + model_texture = inout.load_im(model_texture_path) + + # Normalize the texture image. + if model_texture.max() > 1.0: + model_texture = model_texture.astype(np.float32) / 255.0 + model_texture = np.flipud(model_texture) + self.model_textures[obj_id] = model_texture + + # UV texture coordinates. + texture_uv = model['texture_uv'] + + # Set the per-vertex color to dummy values. + colors = np.zeros((model['pts'].shape[0], 3), np.float32) + + # Use the original model color. + elif 'colors' in model.keys(): + assert (model['pts'].shape[0] == model['colors'].shape[0]) + colors = model['colors'] + if colors.max() > 1.0: + colors /= 255.0 # Color values are expected in range [0, 1]. + + # Set UV texture coordinates to dummy values. + texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32) + + # Set the model color to gray. + else: + colors = np.ones((model['pts'].shape[0], 3), np.float32) * 0.5 + + # Set UV texture coordinates to dummy values. + texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32) + + # Set the vertex data. + if self.mode == 'depth': + vertices_type = [ + ('a_position', np.float32, 3), + ('a_color', np.float32, colors.shape[1]) + ] + vertices = np.array(list(zip(model['pts'], colors)), vertices_type) + else: + if self.shading == 'flat': + vertices_type = [ + ('a_position', np.float32, 3), + ('a_color', np.float32, colors.shape[1]), + ('a_texcoord', np.float32, 2) + ] + vertices = np.array(list(zip(model['pts'], colors, texture_uv)), + vertices_type) + elif self.shading == 'phong': + vertices_type = [ + ('a_position', np.float32, 3), + ('a_normal', np.float32, 3), + ('a_color', np.float32, colors.shape[1]), + ('a_texcoord', np.float32, 2) + ] + vertices = np.array(list(zip(model['pts'], model['normals'], + colors, texture_uv)), vertices_type) + else: + raise ValueError('Unknown shading type.') + + # Create vertex and index buffer for the loaded object model. + self.vertex_buffers[obj_id] = vertices.view(gloo.VertexBuffer) + self.index_buffers[obj_id] = \ + model['faces'].flatten().astype(np.uint32).view(gloo.IndexBuffer) + + # Set shader for the selected shading. + if self.shading == 'flat': + rgb_fragment_code = _rgb_fragment_flat_code + elif self.shading == 'phong': + rgb_fragment_code = _rgb_fragment_phong_code + else: + raise ValueError('Unknown shading type.') + + # Prepare the RGB OpenGL program. + rgb_program = gloo.Program(_rgb_vertex_code, rgb_fragment_code) + rgb_program.bind(self.vertex_buffers[obj_id]) + if self.model_textures[obj_id] is not None: + rgb_program['u_use_texture'] = int(True) + rgb_program['u_texture'] = self.model_textures[obj_id] + else: + rgb_program['u_use_texture'] = int(False) + rgb_program['u_texture'] = np.zeros((1, 1, 4), np.float32) + self.rgb_programs[obj_id] = rgb_program + + # Prepare the depth OpenGL program. + depth_program = gloo.Program(_depth_vertex_code,_depth_fragment_code) + depth_program.bind(self.vertex_buffers[obj_id]) + self.depth_programs[obj_id] = depth_program + + def remove_object(self, obj_id): + """See base class.""" + del self.models[obj_id] + del self.model_bbox_corners[obj_id] + if obj_id in self.model_textures: + del self.model_textures[obj_id] + del self.vertex_buffers[obj_id] + del self.index_buffers[obj_id] + del self.rgb_programs[obj_id] + del self.depth_programs[obj_id] + + def render_object(self, obj_id, R, t, fx, fy, cx, cy): + """See base class.""" + + # Define the following variables as global so their latest values are always + # seen in function on_draw below. + global curr_obj_id, mat_model, mat_view, mat_proj + curr_obj_id = obj_id + + # Model matrix (from object space to world space). + mat_model = np.eye(4, dtype=np.float32) + + # View matrix (from world space to eye space; transforms also the coordinate + # system from OpenCV to OpenGL camera space). + mat_view_cv = np.eye(4, dtype=np.float32) + mat_view_cv[:3, :3], mat_view_cv[:3, 3] = R, t.squeeze() + yz_flip = np.eye(4, dtype=np.float32) + yz_flip[1, 1], yz_flip[2, 2] = -1, -1 + mat_view = yz_flip.dot(mat_view_cv) # OpenCV to OpenGL camera system. + mat_view = mat_view.T # OpenGL expects column-wise matrix format. + + # Calculate the near and far clipping plane from the 3D bounding box. + bbox_corners = self.model_bbox_corners[obj_id] + bbox_corners_ht = np.concatenate( + (bbox_corners, np.ones((bbox_corners.shape[0], 1))), axis=1).transpose() + bbox_corners_eye_z = mat_view_cv[2, :].reshape((1, 4)).dot(bbox_corners_ht) + clip_near = bbox_corners_eye_z.min() + clip_far = bbox_corners_eye_z.max() + + # Projection matrix. + K = np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]]) + mat_proj = _calc_calib_proj( + K, 0, 0, self.width, self.height, clip_near, clip_far) + + @self.window.event + def on_draw(dt): + self.window.clear() + global curr_obj_id, mat_model, mat_view, mat_proj + + # Render the RGB image. + if self.render_rgb: + self.rgb = self._draw_rgb( + curr_obj_id, mat_model, mat_view, mat_proj) + + # Render the depth image. + if self.render_depth: + self.depth = self._draw_depth( + curr_obj_id, mat_model, mat_view, mat_proj) + + # The on_draw function is called framecount+1 times. + app.run(framecount=0) + + if self.mode == 'rgb': + return {'rgb': self.rgb} + elif self.mode == 'depth': + return {'depth': self.depth} + elif self.mode == 'rgb+depth': + return {'rgb': self.rgb, 'depth': self.depth} + + def _draw_rgb(self, obj_id, mat_model, mat_view, mat_proj): + """Renders an RGB image. + + :param obj_id: ID of the object model to render. + :param mat_model: 4x4 ndarray with the model matrix. + :param mat_view: 4x4 ndarray with the view matrix. + :param mat_proj: 4x4 ndarray with the projection matrix. + :return: HxWx3 ndarray with the rendered RGB image. + """ + # Update the OpenGL program. + program = self.rgb_programs[obj_id] + program['u_light_eye_pos'] = [0, 0, 0] # Camera origin. + program['u_light_ambient_w'] = self.light_ambient_weight + program['u_mv'] = _calc_model_view(mat_model, mat_view) + program['u_nm'] = _calc_normal_matrix(mat_model, mat_view) + program['u_mvp'] = _calc_model_view_proj(mat_model, mat_view, mat_proj) + + # OpenGL setup. + gl.glEnable(gl.GL_DEPTH_TEST) + gl.glClearColor( + self.bg_color[0], self.bg_color[1], self.bg_color[2], self.bg_color[3]) + gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) + gl.glViewport(0, 0, self.width, self.height) + + # Keep the back-face culling disabled because of objects which do not have + # well-defined surface (e.g. the lamp from the lm dataset). + gl.glDisable(gl.GL_CULL_FACE) + + # Rendering. + program.draw(gl.GL_TRIANGLES, self.index_buffers[obj_id]) + + # Get the content of the FBO texture. + rgb = np.zeros((self.height, self.width, 4), dtype=np.float32) + gl.glReadPixels(0, 0, self.width, self.height, gl.GL_RGBA, gl.GL_FLOAT, rgb) + rgb.shape = (self.height, self.width, 4) + rgb = rgb[::-1, :] + rgb = np.round(rgb[:, :, :3] * 255).astype(np.uint8) # Convert to [0, 255]. + + return rgb + + def _draw_depth(self, obj_id, mat_model, mat_view, mat_proj): + """Renders a depth image. + + :param obj_id: ID of the object model to render. + :param mat_model: 4x4 ndarray with the model matrix. + :param mat_view: 4x4 ndarray with the view matrix. + :param mat_proj: 4x4 ndarray with the projection matrix. + :return: HxW ndarray with the rendered depth image. + """ + # Update the OpenGL program. + program = self.depth_programs[obj_id] + program['u_mv'] = _calc_model_view(mat_model, mat_view) + program['u_mvp'] = _calc_model_view_proj(mat_model, mat_view, mat_proj) + + # OpenGL setup. + gl.glEnable(gl.GL_DEPTH_TEST) + gl.glClearColor(0.0, 0.0, 0.0, 0.0) + gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) + gl.glViewport(0, 0, self.width, self.height) + + # Keep the back-face culling disabled because of objects which do not have + # well-defined surface (e.g. the lamp from the lm dataset). + gl.glDisable(gl.GL_CULL_FACE) + + # Rendering. + program.draw(gl.GL_TRIANGLES, self.index_buffers[obj_id]) + + # Get the content of the FBO texture. + depth = np.zeros((self.height, self.width, 4), dtype=np.float32) + gl.glReadPixels( + 0, 0, self.width, self.height, gl.GL_RGBA, gl.GL_FLOAT, depth) + depth.shape = (self.height, self.width, 4) + depth = depth[::-1, :] + depth = depth[:, :, 0] # Depth is saved in the first channel + + return depth diff --git a/bop_toolkit_lib/score.py b/bop_toolkit_lib/score.py new file mode 100644 index 0000000..b9e6f91 --- /dev/null +++ b/bop_toolkit_lib/score.py @@ -0,0 +1,169 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Calculation of performance scores.""" + +import numpy as np +from collections import defaultdict + +from bop_toolkit_lib import misc + + +def ap(rec, pre): + """Calculates Average Precision (AP). + + Calculated in the PASCAL VOC challenge from 2010 onwards [1]: + 1) Compute a version of the measured precision/recall curve with precision + monotonically decreasing, by setting the precision for recall r to the + maximum precision obtained for any recall r' >= r. + 2) Compute the AP as the area under this curve by numerical integration. + No approximation is involved since the curve is piecewise constant. + + NOTE: The used AP formula is different from the one in [2] where the + formula from VLFeat [3] was presented - although it was mistakenly + introduced as a formula used in PASCAL. + + References: + [1] http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#SECTION00044000000000000000 + [2] Hodan et al., "On Evaluation of 6D Object Pose Estimation", ECCVW 2016 + [3] http://www.vlfeat.org/matlab/vl_pr.html + + :param rec: A list (or 1D ndarray) of recall rates. + :param pre: A list (or 1D ndarray) of precision rates. + :return: Average Precision - the area under the monotonically decreasing + version of the precision/recall curve given by rec and pre. + """ + # Sorts the precision/recall points by increasing recall. + i = np.argsort(rec) + + mrec = np.concatenate(([0], np.array(rec)[i], [1])) + mpre = np.concatenate(([0], np.array(pre)[i], [0])) + assert (mrec.shape == mpre.shape) + for i in range(mpre.size - 3, -1, -1): + mpre[i] = max(mpre[i], mpre[i + 1]) + i = np.nonzero(mrec[1:] != mrec[:-1])[0] + 1 + ap = np.sum((mrec[i] - mrec[i - 1]) * mpre[i]) + return ap + + +def recall(tp_count, targets_count): + """Calculates recall. + + :param tp_count: Number of true positives. + :param targets_count: Number of targets. + :return: The recall rate. + """ + if targets_count == 0: + return 0.0 + else: + return tp_count / float(targets_count) + + +def calc_localization_scores(scene_ids, obj_ids, matches, n_top, do_print=True): + """Calculates performance scores for the 6D object localization task. + + References: + Hodan et al., BOP: Benchmark for 6D Object Pose Estimation, ECCV'18. + Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16. + + :param scene_ids: ID's of considered scenes. + :param obj_ids: ID's of considered objects. + :param matches: Info about matching pose estimates to ground-truth poses + (see pose_matching.py for details). + :param n_top: Number of top pose estimates to consider per test target. + :param do_print: Whether to print the scores to the standard output. + :return: Dictionary with the evaluation scores. + """ + # Count the number of visible object instances in each image. + insts = {i: {j: defaultdict(lambda: 0) for j in scene_ids} for i in obj_ids} + for m in matches: + if m['valid']: + insts[m['obj_id']][m['scene_id']][m['im_id']] += 1 + + # Count the number of targets = object instances to be found. + # For SiSo, there is either zero or one target in each image - there is just + # one even if there are more instances of the object of interest. + tars = 0 # Total number of targets. + obj_tars = {i: 0 for i in obj_ids} # Targets per object. + scene_tars = {i: 0 for i in scene_ids} # Targets per scene. + for obj_id, obj_insts in insts.items(): + for scene_id, scene_insts in obj_insts.items(): + + # Count the number of targets for the current object in the current scene. + if n_top > 0: + count = sum(np.minimum(n_top, scene_insts.values())) + else: + count = sum(scene_insts.values()) + + tars += count + obj_tars[obj_id] += count + scene_tars[scene_id] += count + + # Count the number of true positives. + tps = 0 # Total number of true positives. + obj_tps = {i: 0 for i in obj_ids} # True positives per object. + scene_tps = {i: 0 for i in scene_ids} # True positives per scene. + for m in matches: + if m['valid'] and m['est_id'] != -1: + tps += 1 + obj_tps[m['obj_id']] += 1 + scene_tps[m['scene_id']] += 1 + + # Total recall. + total_recall = recall(tps, tars) + + # Recall per object. + obj_recalls = {} + for i in obj_ids: + obj_recalls[i] = recall(obj_tps[i], obj_tars[i]) + mean_obj_recall = float(np.mean(obj_recalls.values()).squeeze()) + + # Recall per scene. + scene_recalls = {} + for i in scene_ids: + scene_recalls[i] = float(recall(scene_tps[i], scene_tars[i])) + mean_scene_recall = float(np.mean(scene_recalls.values()).squeeze()) + + # Final scores. + scores = { + 'total_recall': float(total_recall), + 'obj_recalls': obj_recalls, + 'mean_obj_recall': float(mean_obj_recall), + 'scene_recalls': scene_recalls, + 'mean_scene_recall': float(mean_scene_recall), + 'gt_count': len(matches), + 'targets_count': int(tars), + 'tp_count': int(tps), + } + + if do_print: + obj_recalls_str = ', '.join( + ['{}: {:.3f}'.format(i, s) for i, s in scores['obj_recalls'].items()]) + + scene_recalls_str = ', '.join( + ['{}: {:.3f}'.format(i, s) for i, s in scores['scene_recalls'].items()]) + + misc.log('') + misc.log('GT count: {:d}'.format(scores['gt_count'])) + misc.log('Target count: {:d}'.format(scores['targets_count'])) + misc.log('TP count: {:d}'.format(scores['tp_count'])) + misc.log('Total recall: {:.4f}'.format(scores['total_recall'])) + misc.log('Mean object recall: {:.4f}'.format(scores['mean_obj_recall'])) + misc.log('Mean scene recall: {:.4f}'.format(scores['mean_scene_recall'])) + misc.log('Object recalls:\n{}'.format(obj_recalls_str)) + misc.log('Scene recalls:\n{}'.format(scene_recalls_str)) + misc.log('') + + return scores + + +if __name__ == '__main__': + + # AP test. + tp = np.array([False, True, True, False, True, False]) + fp = np.logical_not(tp) + tp_c = np.cumsum(tp).astype(np.float) + fp_c = np.cumsum(fp).astype(np.float) + rec = tp_c / tp.size + pre = tp_c / (fp_c + tp_c) + misc.log('Average Precision: ' + str(ap(rec, pre))) diff --git a/bop_toolkit_lib/transform.py b/bop_toolkit_lib/transform.py new file mode 100644 index 0000000..baf4d1d --- /dev/null +++ b/bop_toolkit_lib/transform.py @@ -0,0 +1,1917 @@ +# -*- coding: utf-8 -*- +# transform.py + +# Copyright (c) 2006-2015, Christoph Gohlke +# Copyright (c) 2006-2015, The Regents of the University of California +# Produced at the Laboratory for Fluorescence Dynamics +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the copyright holders nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Homogeneous Transformation Matrices and Quaternions. + +A library for calculating 4x4 matrices for translating, rotating, reflecting, +scaling, shearing, projecting, orthogonalizing, and superimposing arrays of +3D homogeneous coordinates as well as for converting between rotation matrices, +Euler angles, and quaternions. Also includes an Arcball control object and +functions to decompose transformation matrices. + +:Author: + `Christoph Gohlke `_ + +:Organization: + Laboratory for Fluorescence Dynamics, University of California, Irvine + +:Version: 2015.03.19 + +Requirements +------------ +* `CPython 2.7 or 3.4 `_ +* `Numpy 1.9 `_ +* `Transformations.c 2015.03.19 `_ + (recommended for speedup of some functions) + +Notes +----- +The API is not stable yet and is expected to change between revisions. + +This Python code is not optimized for speed. Refer to the transformations.c +module for a faster implementation of some functions. + +Documentation in HTML format can be generated with epydoc. + +Matrices (M) can be inverted using numpy.linalg.inv(M), be concatenated using +numpy.dot(M0, M1), or transform homogeneous coordinate arrays (v) using +numpy.dot(M, v) for shape (4, \*) column vectors, respectively +numpy.dot(v, M.T) for shape (\*, 4) row vectors ("array of points"). + +This module follows the "column vectors on the right" and "row major storage" +(C contiguous) conventions. The translation components are in the right column +of the transformation matrix, i.e. M[:3, 3]. +The transpose of the transformation matrices may have to be used to interface +with other graphics systems, e.g. with OpenGL's glMultMatrixd(). See also [16]. + +Calculations are carried out with numpy.float64 precision. + +Vector, point, quaternion, and matrix function arguments are expected to be +"array like", i.e. tuple, list, or numpy arrays. + +Return types are numpy arrays unless specified otherwise. + +Angles are in radians unless specified otherwise. + +Quaternions w+ix+jy+kz are represented as [w, x, y, z]. + +A triple of Euler angles can be applied/interpreted in 24 ways, which can +be specified using a 4 character string or encoded 4-tuple: + + *Axes 4-string*: e.g. 'sxyz' or 'ryxy' + + - first character : rotations are applied to 's'tatic or 'r'otating frame + - remaining characters : successive rotation axis 'x', 'y', or 'z' + + *Axes 4-tuple*: e.g. (0, 0, 0, 0) or (1, 1, 1, 1) + + - inner axis: code of axis ('x':0, 'y':1, 'z':2) of rightmost matrix. + - parity : even (0) if inner axis 'x' is followed by 'y', 'y' is followed + by 'z', or 'z' is followed by 'x'. Otherwise odd (1). + - repetition : first and last axis are same (1) or different (0). + - frame : rotations are applied to static (0) or rotating (1) frame. + +Other Python packages and modules for 3D transformations and quaternions: + +* `Transforms3d `_ + includes most code of this module. +* `Blender.mathutils `_ +* `numpy-dtypes `_ + +References +---------- +(1) Matrices and transformations. Ronald Goldman. + In "Graphics Gems I", pp 472-475. Morgan Kaufmann, 1990. +(2) More matrices and transformations: shear and pseudo-perspective. + Ronald Goldman. In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991. +(3) Decomposing a matrix into simple transformations. Spencer Thomas. + In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991. +(4) Recovering the data from the transformation matrix. Ronald Goldman. + In "Graphics Gems II", pp 324-331. Morgan Kaufmann, 1991. +(5) Euler angle conversion. Ken Shoemake. + In "Graphics Gems IV", pp 222-229. Morgan Kaufmann, 1994. +(6) Arcball rotation control. Ken Shoemake. + In "Graphics Gems IV", pp 175-192. Morgan Kaufmann, 1994. +(7) Representing attitude: Euler angles, unit quaternions, and rotation + vectors. James Diebel. 2006. +(8) A discussion of the solution for the best rotation to relate two sets + of vectors. W Kabsch. Acta Cryst. 1978. A34, 827-828. +(9) Closed-form solution of absolute orientation using unit quaternions. + BKP Horn. J Opt Soc Am A. 1987. 4(4):629-642. +(10) Quaternions. Ken Shoemake. + http://www.sfu.ca/~jwa3/cmpt461/files/quatut.pdf +(11) From quaternion to matrix and back. JMP van Waveren. 2005. + http://www.intel.com/cd/ids/developer/asmo-na/eng/293748.htm +(12) Uniform random rotations. Ken Shoemake. + In "Graphics Gems III", pp 124-132. Morgan Kaufmann, 1992. +(13) Quaternion in molecular modeling. CFF Karney. + J Mol Graph Mod, 25(5):595-604 +(14) New method for extracting the quaternion from a rotation matrix. + Itzhack Y Bar-Itzhack, J Guid Contr Dynam. 2000. 23(6): 1085-1087. +(15) Multiple View Geometry in Computer Vision. Hartley and Zissermann. + Cambridge University Press; 2nd Ed. 2004. Chapter 4, Algorithm 4.7, p 130. +(16) Column Vectors vs. Row Vectors. + http://steve.hollasch.net/cgindex/math/matrix/column-vec.html + +Examples +-------- +>>> alpha, beta, gamma = 0.123, -1.234, 2.345 +>>> origin, xaxis, yaxis, zaxis = [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1] +>>> I = identity_matrix() +>>> Rx = rotation_matrix(alpha, xaxis) +>>> Ry = rotation_matrix(beta, yaxis) +>>> Rz = rotation_matrix(gamma, zaxis) +>>> R = concatenate_matrices(Rx, Ry, Rz) +>>> euler = euler_from_matrix(R, 'rxyz') +>>> numpy.allclose([alpha, beta, gamma], euler) +True +>>> Re = euler_matrix(alpha, beta, gamma, 'rxyz') +>>> is_same_transform(R, Re) +True +>>> al, be, ga = euler_from_matrix(Re, 'rxyz') +>>> is_same_transform(Re, euler_matrix(al, be, ga, 'rxyz')) +True +>>> qx = quaternion_about_axis(alpha, xaxis) +>>> qy = quaternion_about_axis(beta, yaxis) +>>> qz = quaternion_about_axis(gamma, zaxis) +>>> q = quaternion_multiply(qx, qy) +>>> q = quaternion_multiply(q, qz) +>>> Rq = quaternion_matrix(q) +>>> is_same_transform(R, Rq) +True +>>> S = scale_matrix(1.23, origin) +>>> T = translation_matrix([1, 2, 3]) +>>> Z = shear_matrix(beta, xaxis, origin, zaxis) +>>> R = random_rotation_matrix(numpy.random.rand(3)) +>>> M = concatenate_matrices(T, R, Z, S) +>>> scale, shear, angles, trans, persp = decompose_matrix(M) +>>> numpy.allclose(scale, 1.23) +True +>>> numpy.allclose(trans, [1, 2, 3]) +True +>>> numpy.allclose(shear, [0, math.tan(beta), 0]) +True +>>> is_same_transform(R, euler_matrix(axes='sxyz', *angles)) +True +>>> M1 = compose_matrix(scale, shear, angles, trans, persp) +>>> is_same_transform(M, M1) +True +>>> v0, v1 = random_vector(3), random_vector(3) +>>> M = rotation_matrix(angle_between_vectors(v0, v1), vector_product(v0, v1)) +>>> v2 = numpy.dot(v0, M[:3,:3].T) +>>> numpy.allclose(unit_vector(v1), unit_vector(v2)) +True + +""" + +from __future__ import division, print_function + +import math + +import numpy + +__version__ = '2015.03.19' +__docformat__ = 'restructuredtext en' +__all__ = () + + +def identity_matrix(): + """Return 4x4 identity/unit matrix. + + >>> I = identity_matrix() + >>> numpy.allclose(I, numpy.dot(I, I)) + True + >>> numpy.sum(I), numpy.trace(I) + (4.0, 4.0) + >>> numpy.allclose(I, numpy.identity(4)) + True + + """ + return numpy.identity(4) + + +def translation_matrix(direction): + """Return matrix to translate by direction vector. + + >>> v = numpy.random.random(3) - 0.5 + >>> numpy.allclose(v, translation_matrix(v)[:3, 3]) + True + + """ + M = numpy.identity(4) + M[:3, 3] = direction[:3] + return M + + +def translation_from_matrix(matrix): + """Return translation vector from translation matrix. + + >>> v0 = numpy.random.random(3) - 0.5 + >>> v1 = translation_from_matrix(translation_matrix(v0)) + >>> numpy.allclose(v0, v1) + True + + """ + return numpy.array(matrix, copy=False)[:3, 3].copy() + + +def reflection_matrix(point, normal): + """Return matrix to mirror at plane defined by point and normal vector. + + >>> v0 = numpy.random.random(4) - 0.5 + >>> v0[3] = 1. + >>> v1 = numpy.random.random(3) - 0.5 + >>> R = reflection_matrix(v0, v1) + >>> numpy.allclose(2, numpy.trace(R)) + True + >>> numpy.allclose(v0, numpy.dot(R, v0)) + True + >>> v2 = v0.copy() + >>> v2[:3] += v1 + >>> v3 = v0.copy() + >>> v2[:3] -= v1 + >>> numpy.allclose(v2, numpy.dot(R, v3)) + True + + """ + normal = unit_vector(normal[:3]) + M = numpy.identity(4) + M[:3, :3] -= 2.0 * numpy.outer(normal, normal) + M[:3, 3] = (2.0 * numpy.dot(point[:3], normal)) * normal + return M + + +def reflection_from_matrix(matrix): + """Return mirror plane point and normal vector from reflection matrix. + + >>> v0 = numpy.random.random(3) - 0.5 + >>> v1 = numpy.random.random(3) - 0.5 + >>> M0 = reflection_matrix(v0, v1) + >>> point, normal = reflection_from_matrix(M0) + >>> M1 = reflection_matrix(point, normal) + >>> is_same_transform(M0, M1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False) + # normal: unit eigenvector corresponding to eigenvalue -1 + w, V = numpy.linalg.eig(M[:3, :3]) + i = numpy.where(abs(numpy.real(w) + 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no unit eigenvector corresponding to eigenvalue -1") + normal = numpy.real(V[:, i[0]]).squeeze() + # point: any unit eigenvector corresponding to eigenvalue 1 + w, V = numpy.linalg.eig(M) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no unit eigenvector corresponding to eigenvalue 1") + point = numpy.real(V[:, i[-1]]).squeeze() + point /= point[3] + return point, normal + + +def rotation_matrix(angle, direction, point=None): + """Return matrix to rotate about axis defined by point and direction. + + >>> R = rotation_matrix(math.pi/2, [0, 0, 1], [1, 0, 0]) + >>> numpy.allclose(numpy.dot(R, [0, 0, 0, 1]), [1, -1, 0, 1]) + True + >>> angle = (random.random() - 0.5) * (2*math.pi) + >>> direc = numpy.random.random(3) - 0.5 + >>> point = numpy.random.random(3) - 0.5 + >>> R0 = rotation_matrix(angle, direc, point) + >>> R1 = rotation_matrix(angle-2*math.pi, direc, point) + >>> is_same_transform(R0, R1) + True + >>> R0 = rotation_matrix(angle, direc, point) + >>> R1 = rotation_matrix(-angle, -direc, point) + >>> is_same_transform(R0, R1) + True + >>> I = numpy.identity(4, numpy.float64) + >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc)) + True + >>> numpy.allclose(2, numpy.trace(rotation_matrix(math.pi/2, + ... direc, point))) + True + + """ + sina = math.sin(angle) + cosa = math.cos(angle) + direction = unit_vector(direction[:3]) + # rotation matrix around unit vector + R = numpy.diag([cosa, cosa, cosa]) + R += numpy.outer(direction, direction) * (1.0 - cosa) + direction *= sina + R += numpy.array([[0.0, -direction[2], direction[1]], + [direction[2], 0.0, -direction[0]], + [-direction[1], direction[0], 0.0]]) + M = numpy.identity(4) + M[:3, :3] = R + if point is not None: + # rotation not around origin + point = numpy.array(point[:3], dtype=numpy.float64, copy=False) + M[:3, 3] = point - numpy.dot(R, point) + return M + + +def rotation_from_matrix(matrix): + """Return rotation angle and axis from rotation matrix. + + >>> angle = (random.random() - 0.5) * (2*math.pi) + >>> direc = numpy.random.random(3) - 0.5 + >>> point = numpy.random.random(3) - 0.5 + >>> R0 = rotation_matrix(angle, direc, point) + >>> angle, direc, point = rotation_from_matrix(R0) + >>> R1 = rotation_matrix(angle, direc, point) + >>> is_same_transform(R0, R1) + True + + """ + R = numpy.array(matrix, dtype=numpy.float64, copy=False) + R33 = R[:3, :3] + # direction: unit eigenvector of R33 corresponding to eigenvalue of 1 + w, W = numpy.linalg.eig(R33.T) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no unit eigenvector corresponding to eigenvalue 1") + direction = numpy.real(W[:, i[-1]]).squeeze() + # point: unit eigenvector of R33 corresponding to eigenvalue of 1 + w, Q = numpy.linalg.eig(R) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no unit eigenvector corresponding to eigenvalue 1") + point = numpy.real(Q[:, i[-1]]).squeeze() + point /= point[3] + # rotation angle depending on direction + cosa = (numpy.trace(R33) - 1.0) / 2.0 + if abs(direction[2]) > 1e-8: + sina = (R[1, 0] + (cosa - 1.0) * direction[0] * direction[1]) / direction[2] + elif abs(direction[1]) > 1e-8: + sina = (R[0, 2] + (cosa - 1.0) * direction[0] * direction[2]) / direction[1] + else: + sina = (R[2, 1] + (cosa - 1.0) * direction[1] * direction[2]) / direction[0] + angle = math.atan2(sina, cosa) + return angle, direction, point + + +def scale_matrix(factor, origin=None, direction=None): + """Return matrix to scale by factor around origin in direction. + + Use factor -1 for point symmetry. + + >>> v = (numpy.random.rand(4, 5) - 0.5) * 20 + >>> v[3] = 1 + >>> S = scale_matrix(-1.234) + >>> numpy.allclose(numpy.dot(S, v)[:3], -1.234*v[:3]) + True + >>> factor = random.random() * 10 - 5 + >>> origin = numpy.random.random(3) - 0.5 + >>> direct = numpy.random.random(3) - 0.5 + >>> S = scale_matrix(factor, origin) + >>> S = scale_matrix(factor, origin, direct) + + """ + if direction is None: + # uniform scaling + M = numpy.diag([factor, factor, factor, 1.0]) + if origin is not None: + M[:3, 3] = origin[:3] + M[:3, 3] *= 1.0 - factor + else: + # nonuniform scaling + direction = unit_vector(direction[:3]) + factor = 1.0 - factor + M = numpy.identity(4) + M[:3, :3] -= factor * numpy.outer(direction, direction) + if origin is not None: + M[:3, 3] = (factor * numpy.dot(origin[:3], direction)) * direction + return M + + +def scale_from_matrix(matrix): + """Return scaling factor, origin and direction from scaling matrix. + + >>> factor = random.random() * 10 - 5 + >>> origin = numpy.random.random(3) - 0.5 + >>> direct = numpy.random.random(3) - 0.5 + >>> S0 = scale_matrix(factor, origin) + >>> factor, origin, direction = scale_from_matrix(S0) + >>> S1 = scale_matrix(factor, origin, direction) + >>> is_same_transform(S0, S1) + True + >>> S0 = scale_matrix(factor, origin, direct) + >>> factor, origin, direction = scale_from_matrix(S0) + >>> S1 = scale_matrix(factor, origin, direction) + >>> is_same_transform(S0, S1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False) + M33 = M[:3, :3] + factor = numpy.trace(M33) - 2.0 + try: + # direction: unit eigenvector corresponding to eigenvalue factor + w, V = numpy.linalg.eig(M33) + i = numpy.where(abs(numpy.real(w) - factor) < 1e-8)[0][0] + direction = numpy.real(V[:, i]).squeeze() + direction /= vector_norm(direction) + except IndexError: + # uniform scaling + factor = (factor + 2.0) / 3.0 + direction = None + # origin: any eigenvector corresponding to eigenvalue 1 + w, V = numpy.linalg.eig(M) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no eigenvector corresponding to eigenvalue 1") + origin = numpy.real(V[:, i[-1]]).squeeze() + origin /= origin[3] + return factor, origin, direction + + +def projection_matrix(point, normal, direction=None, + perspective=None, pseudo=False): + """Return matrix to project onto plane defined by point and normal. + + Using either perspective point, projection direction, or none of both. + + If pseudo is True, perspective projections will preserve relative depth + such that Perspective = dot(Orthogonal, PseudoPerspective). + + >>> P = projection_matrix([0, 0, 0], [1, 0, 0]) + >>> numpy.allclose(P[1:, 1:], numpy.identity(4)[1:, 1:]) + True + >>> point = numpy.random.random(3) - 0.5 + >>> normal = numpy.random.random(3) - 0.5 + >>> direct = numpy.random.random(3) - 0.5 + >>> persp = numpy.random.random(3) - 0.5 + >>> P0 = projection_matrix(point, normal) + >>> P1 = projection_matrix(point, normal, direction=direct) + >>> P2 = projection_matrix(point, normal, perspective=persp) + >>> P3 = projection_matrix(point, normal, perspective=persp, pseudo=True) + >>> is_same_transform(P2, numpy.dot(P0, P3)) + True + >>> P = projection_matrix([3, 0, 0], [1, 1, 0], [1, 0, 0]) + >>> v0 = (numpy.random.rand(4, 5) - 0.5) * 20 + >>> v0[3] = 1 + >>> v1 = numpy.dot(P, v0) + >>> numpy.allclose(v1[1], v0[1]) + True + >>> numpy.allclose(v1[0], 3-v1[1]) + True + + """ + M = numpy.identity(4) + point = numpy.array(point[:3], dtype=numpy.float64, copy=False) + normal = unit_vector(normal[:3]) + if perspective is not None: + # perspective projection + perspective = numpy.array(perspective[:3], dtype=numpy.float64, + copy=False) + M[0, 0] = M[1, 1] = M[2, 2] = numpy.dot(perspective - point, normal) + M[:3, :3] -= numpy.outer(perspective, normal) + if pseudo: + # preserve relative depth + M[:3, :3] -= numpy.outer(normal, normal) + M[:3, 3] = numpy.dot(point, normal) * (perspective + normal) + else: + M[:3, 3] = numpy.dot(point, normal) * perspective + M[3, :3] = -normal + M[3, 3] = numpy.dot(perspective, normal) + elif direction is not None: + # parallel projection + direction = numpy.array(direction[:3], dtype=numpy.float64, copy=False) + scale = numpy.dot(direction, normal) + M[:3, :3] -= numpy.outer(direction, normal) / scale + M[:3, 3] = direction * (numpy.dot(point, normal) / scale) + else: + # orthogonal projection + M[:3, :3] -= numpy.outer(normal, normal) + M[:3, 3] = numpy.dot(point, normal) * normal + return M + + +def projection_from_matrix(matrix, pseudo=False): + """Return projection plane and perspective point from projection matrix. + + Return values are same as arguments for projection_matrix function: + point, normal, direction, perspective, and pseudo. + + >>> point = numpy.random.random(3) - 0.5 + >>> normal = numpy.random.random(3) - 0.5 + >>> direct = numpy.random.random(3) - 0.5 + >>> persp = numpy.random.random(3) - 0.5 + >>> P0 = projection_matrix(point, normal) + >>> result = projection_from_matrix(P0) + >>> P1 = projection_matrix(*result) + >>> is_same_transform(P0, P1) + True + >>> P0 = projection_matrix(point, normal, direct) + >>> result = projection_from_matrix(P0) + >>> P1 = projection_matrix(*result) + >>> is_same_transform(P0, P1) + True + >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=False) + >>> result = projection_from_matrix(P0, pseudo=False) + >>> P1 = projection_matrix(*result) + >>> is_same_transform(P0, P1) + True + >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=True) + >>> result = projection_from_matrix(P0, pseudo=True) + >>> P1 = projection_matrix(*result) + >>> is_same_transform(P0, P1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False) + M33 = M[:3, :3] + w, V = numpy.linalg.eig(M) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not pseudo and len(i): + # point: any eigenvector corresponding to eigenvalue 1 + point = numpy.real(V[:, i[-1]]).squeeze() + point /= point[3] + # direction: unit eigenvector corresponding to eigenvalue 0 + w, V = numpy.linalg.eig(M33) + i = numpy.where(abs(numpy.real(w)) < 1e-8)[0] + if not len(i): + raise ValueError("no eigenvector corresponding to eigenvalue 0") + direction = numpy.real(V[:, i[0]]).squeeze() + direction /= vector_norm(direction) + # normal: unit eigenvector of M33.T corresponding to eigenvalue 0 + w, V = numpy.linalg.eig(M33.T) + i = numpy.where(abs(numpy.real(w)) < 1e-8)[0] + if len(i): + # parallel projection + normal = numpy.real(V[:, i[0]]).squeeze() + normal /= vector_norm(normal) + return point, normal, direction, None, False + else: + # orthogonal projection, where normal equals direction vector + return point, direction, None, None, False + else: + # perspective projection + i = numpy.where(abs(numpy.real(w)) > 1e-8)[0] + if not len(i): + raise ValueError( + "no eigenvector not corresponding to eigenvalue 0") + point = numpy.real(V[:, i[-1]]).squeeze() + point /= point[3] + normal = - M[3, :3] + perspective = M[:3, 3] / numpy.dot(point[:3], normal) + if pseudo: + perspective -= normal + return point, normal, None, perspective, pseudo + + +def clip_matrix(left, right, bottom, top, near, far, perspective=False): + """Return matrix to obtain normalized device coordinates from frustum. + + The frustum bounds are axis-aligned along x (left, right), + y (bottom, top) and z (near, far). + + Normalized device coordinates are in range [-1, 1] if coordinates are + inside the frustum. + + If perspective is True the frustum is a truncated pyramid with the + perspective point at origin and direction along z axis, otherwise an + orthographic canonical view volume (a box). + + Homogeneous coordinates transformed by the perspective clip matrix + need to be dehomogenized (divided by w coordinate). + + >>> frustum = numpy.random.rand(6) + >>> frustum[1] += frustum[0] + >>> frustum[3] += frustum[2] + >>> frustum[5] += frustum[4] + >>> M = clip_matrix(perspective=False, *frustum) + >>> numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1]) + array([-1., -1., -1., 1.]) + >>> numpy.dot(M, [frustum[1], frustum[3], frustum[5], 1]) + array([ 1., 1., 1., 1.]) + >>> M = clip_matrix(perspective=True, *frustum) + >>> v = numpy.dot(M, [frustum[0], frustum[2], frustum[4], 1]) + >>> v / v[3] + array([-1., -1., -1., 1.]) + >>> v = numpy.dot(M, [frustum[1], frustum[3], frustum[4], 1]) + >>> v / v[3] + array([ 1., 1., -1., 1.]) + + """ + if left >= right or bottom >= top or near >= far: + raise ValueError("invalid frustum") + if perspective: + if near <= _EPS: + raise ValueError("invalid frustum: near <= 0") + t = 2.0 * near + M = [[t / (left - right), 0.0, (right + left) / (right - left), 0.0], + [0.0, t / (bottom - top), (top + bottom) / (top - bottom), 0.0], + [0.0, 0.0, (far + near) / (near - far), t * far / (far - near)], + [0.0, 0.0, -1.0, 0.0]] + else: + M = [[2.0 / (right - left), 0.0, 0.0, (right + left) / (left - right)], + [0.0, 2.0 / (top - bottom), 0.0, (top + bottom) / (bottom - top)], + [0.0, 0.0, 2.0 / (far - near), (far + near) / (near - far)], + [0.0, 0.0, 0.0, 1.0]] + return numpy.array(M) + + +def shear_matrix(angle, direction, point, normal): + """Return matrix to shear by angle along direction vector on shear plane. + + The shear plane is defined by a point and normal vector. The direction + vector must be orthogonal to the plane's normal vector. + + A point P is transformed by the shear matrix into P" such that + the vector P-P" is parallel to the direction vector and its extent is + given by the angle of P-P'-P", where P' is the orthogonal projection + of P onto the shear plane. + + >>> angle = (random.random() - 0.5) * 4*math.pi + >>> direct = numpy.random.random(3) - 0.5 + >>> point = numpy.random.random(3) - 0.5 + >>> normal = numpy.cross(direct, numpy.random.random(3)) + >>> S = shear_matrix(angle, direct, point, normal) + >>> numpy.allclose(1, numpy.linalg.det(S)) + True + + """ + normal = unit_vector(normal[:3]) + direction = unit_vector(direction[:3]) + if abs(numpy.dot(normal, direction)) > 1e-6: + raise ValueError("direction and normal vectors are not orthogonal") + angle = math.tan(angle) + M = numpy.identity(4) + M[:3, :3] += angle * numpy.outer(direction, normal) + M[:3, 3] = -angle * numpy.dot(point[:3], normal) * direction + return M + + +def shear_from_matrix(matrix): + """Return shear angle, direction and plane from shear matrix. + + >>> angle = (random.random() - 0.5) * 4*math.pi + >>> direct = numpy.random.random(3) - 0.5 + >>> point = numpy.random.random(3) - 0.5 + >>> normal = numpy.cross(direct, numpy.random.random(3)) + >>> S0 = shear_matrix(angle, direct, point, normal) + >>> angle, direct, point, normal = shear_from_matrix(S0) + >>> S1 = shear_matrix(angle, direct, point, normal) + >>> is_same_transform(S0, S1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False) + M33 = M[:3, :3] + # normal: cross independent eigenvectors corresponding to the eigenvalue 1 + w, V = numpy.linalg.eig(M33) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-4)[0] + if len(i) < 2: + raise ValueError("no two linear independent eigenvectors found %s" % w) + V = numpy.real(V[:, i]).squeeze().T + lenorm = -1.0 + for i0, i1 in ((0, 1), (0, 2), (1, 2)): + n = numpy.cross(V[i0], V[i1]) + w = vector_norm(n) + if w > lenorm: + lenorm = w + normal = n + normal /= lenorm + # direction and angle + direction = numpy.dot(M33 - numpy.identity(3), normal) + angle = vector_norm(direction) + direction /= angle + angle = math.atan(angle) + # point: eigenvector corresponding to eigenvalue 1 + w, V = numpy.linalg.eig(M) + i = numpy.where(abs(numpy.real(w) - 1.0) < 1e-8)[0] + if not len(i): + raise ValueError("no eigenvector corresponding to eigenvalue 1") + point = numpy.real(V[:, i[-1]]).squeeze() + point /= point[3] + return angle, direction, point, normal + + +def decompose_matrix(matrix): + """Return sequence of transformations from transformation matrix. + + matrix : array_like + Non-degenerative homogeneous transformation matrix + + Return tuple of: + scale : vector of 3 scaling factors + shear : list of shear factors for x-y, x-z, y-z axes + angles : list of Euler angles about static x, y, z axes + translate : translation vector along x, y, z axes + perspective : perspective partition of matrix + + Raise ValueError if matrix is of wrong type or degenerative. + + >>> T0 = translation_matrix([1, 2, 3]) + >>> scale, shear, angles, trans, persp = decompose_matrix(T0) + >>> T1 = translation_matrix(trans) + >>> numpy.allclose(T0, T1) + True + >>> S = scale_matrix(0.123) + >>> scale, shear, angles, trans, persp = decompose_matrix(S) + >>> scale[0] + 0.123 + >>> R0 = euler_matrix(1, 2, 3) + >>> scale, shear, angles, trans, persp = decompose_matrix(R0) + >>> R1 = euler_matrix(*angles) + >>> numpy.allclose(R0, R1) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=True).T + if abs(M[3, 3]) < _EPS: + raise ValueError("M[3, 3] is zero") + M /= M[3, 3] + P = M.copy() + P[:, 3] = 0.0, 0.0, 0.0, 1.0 + if not numpy.linalg.det(P): + raise ValueError("matrix is singular") + + scale = numpy.zeros((3,)) + shear = [0.0, 0.0, 0.0] + angles = [0.0, 0.0, 0.0] + + if any(abs(M[:3, 3]) > _EPS): + perspective = numpy.dot(M[:, 3], numpy.linalg.inv(P.T)) + M[:, 3] = 0.0, 0.0, 0.0, 1.0 + else: + perspective = numpy.array([0.0, 0.0, 0.0, 1.0]) + + translate = M[3, :3].copy() + M[3, :3] = 0.0 + + row = M[:3, :3].copy() + scale[0] = vector_norm(row[0]) + row[0] /= scale[0] + shear[0] = numpy.dot(row[0], row[1]) + row[1] -= row[0] * shear[0] + scale[1] = vector_norm(row[1]) + row[1] /= scale[1] + shear[0] /= scale[1] + shear[1] = numpy.dot(row[0], row[2]) + row[2] -= row[0] * shear[1] + shear[2] = numpy.dot(row[1], row[2]) + row[2] -= row[1] * shear[2] + scale[2] = vector_norm(row[2]) + row[2] /= scale[2] + shear[1:] /= scale[2] + + if numpy.dot(row[0], numpy.cross(row[1], row[2])) < 0: + numpy.negative(scale, scale) + numpy.negative(row, row) + + angles[1] = math.asin(-row[0, 2]) + if math.cos(angles[1]): + angles[0] = math.atan2(row[1, 2], row[2, 2]) + angles[2] = math.atan2(row[0, 1], row[0, 0]) + else: + # angles[0] = math.atan2(row[1, 0], row[1, 1]) + angles[0] = math.atan2(-row[2, 1], row[1, 1]) + angles[2] = 0.0 + + return scale, shear, angles, translate, perspective + + +def compose_matrix(scale=None, shear=None, angles=None, translate=None, + perspective=None): + """Return transformation matrix from sequence of transformations. + + This is the inverse of the decompose_matrix function. + + Sequence of transformations: + scale : vector of 3 scaling factors + shear : list of shear factors for x-y, x-z, y-z axes + angles : list of Euler angles about static x, y, z axes + translate : translation vector along x, y, z axes + perspective : perspective partition of matrix + + >>> scale = numpy.random.random(3) - 0.5 + >>> shear = numpy.random.random(3) - 0.5 + >>> angles = (numpy.random.random(3) - 0.5) * (2*math.pi) + >>> trans = numpy.random.random(3) - 0.5 + >>> persp = numpy.random.random(4) - 0.5 + >>> M0 = compose_matrix(scale, shear, angles, trans, persp) + >>> result = decompose_matrix(M0) + >>> M1 = compose_matrix(*result) + >>> is_same_transform(M0, M1) + True + + """ + M = numpy.identity(4) + if perspective is not None: + P = numpy.identity(4) + P[3, :] = perspective[:4] + M = numpy.dot(M, P) + if translate is not None: + T = numpy.identity(4) + T[:3, 3] = translate[:3] + M = numpy.dot(M, T) + if angles is not None: + R = euler_matrix(angles[0], angles[1], angles[2], 'sxyz') + M = numpy.dot(M, R) + if shear is not None: + Z = numpy.identity(4) + Z[1, 2] = shear[2] + Z[0, 2] = shear[1] + Z[0, 1] = shear[0] + M = numpy.dot(M, Z) + if scale is not None: + S = numpy.identity(4) + S[0, 0] = scale[0] + S[1, 1] = scale[1] + S[2, 2] = scale[2] + M = numpy.dot(M, S) + M /= M[3, 3] + return M + + +def orthogonalization_matrix(lengths, angles): + """Return orthogonalization matrix for crystallographic cell coordinates. + + Angles are expected in degrees. + + The de-orthogonalization matrix is the inverse. + + >>> O = orthogonalization_matrix([10, 10, 10], [90, 90, 90]) + >>> numpy.allclose(O[:3, :3], numpy.identity(3, float) * 10) + True + >>> O = orthogonalization_matrix([9.8, 12.0, 15.5], [87.2, 80.7, 69.7]) + >>> numpy.allclose(numpy.sum(O), 43.063229) + True + + """ + a, b, c = lengths + angles = numpy.radians(angles) + sina, sinb, _ = numpy.sin(angles) + cosa, cosb, cosg = numpy.cos(angles) + co = (cosa * cosb - cosg) / (sina * sinb) + return numpy.array([ + [a * sinb * math.sqrt(1.0 - co * co), 0.0, 0.0, 0.0], + [-a * sinb * co, b * sina, 0.0, 0.0], + [a * cosb, b * cosa, c, 0.0], + [0.0, 0.0, 0.0, 1.0]]) + + +def affine_matrix_from_points(v0, v1, shear=True, scale=True, usesvd=True): + """Return affine transform matrix to register two point sets. + + v0 and v1 are shape (ndims, \*) arrays of at least ndims non-homogeneous + coordinates, where ndims is the dimensionality of the coordinate space. + + If shear is False, a similarity transformation matrix is returned. + If also scale is False, a rigid/Euclidean transformation matrix + is returned. + + By default the algorithm by Hartley and Zissermann [15] is used. + If usesvd is True, similarity and Euclidean transformation matrices + are calculated by minimizing the weighted sum of squared deviations + (RMSD) according to the algorithm by Kabsch [8]. + Otherwise, and if ndims is 3, the quaternion based algorithm by Horn [9] + is used, which is slower when using this Python implementation. + + The returned matrix performs rotation, translation and uniform scaling + (if specified). + + >>> v0 = [[0, 1031, 1031, 0], [0, 0, 1600, 1600]] + >>> v1 = [[675, 826, 826, 677], [55, 52, 281, 277]] + >>> affine_matrix_from_points(v0, v1) + array([[ 0.14549, 0.00062, 675.50008], + [ 0.00048, 0.14094, 53.24971], + [ 0. , 0. , 1. ]]) + >>> T = translation_matrix(numpy.random.random(3)-0.5) + >>> R = random_rotation_matrix(numpy.random.random(3)) + >>> S = scale_matrix(random.random()) + >>> M = concatenate_matrices(T, R, S) + >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20 + >>> v0[3] = 1 + >>> v1 = numpy.dot(M, v0) + >>> v0[:3] += numpy.random.normal(0, 1e-8, 300).reshape(3, -1) + >>> M = affine_matrix_from_points(v0[:3], v1[:3]) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + + More examples in superimposition_matrix() + + """ + v0 = numpy.array(v0, dtype=numpy.float64, copy=True) + v1 = numpy.array(v1, dtype=numpy.float64, copy=True) + + ndims = v0.shape[0] + if ndims < 2 or v0.shape[1] < ndims or v0.shape != v1.shape: + raise ValueError("input arrays are of wrong shape or type") + + # move centroids to origin + t0 = -numpy.mean(v0, axis=1) + M0 = numpy.identity(ndims + 1) + M0[:ndims, ndims] = t0 + v0 += t0.reshape(ndims, 1) + t1 = -numpy.mean(v1, axis=1) + M1 = numpy.identity(ndims + 1) + M1[:ndims, ndims] = t1 + v1 += t1.reshape(ndims, 1) + + if shear: + # Affine transformation + A = numpy.concatenate((v0, v1), axis=0) + u, s, vh = numpy.linalg.svd(A.T) + vh = vh[:ndims].T + B = vh[:ndims] + C = vh[ndims:2 * ndims] + t = numpy.dot(C, numpy.linalg.pinv(B)) + t = numpy.concatenate((t, numpy.zeros((ndims, 1))), axis=1) + M = numpy.vstack((t, ((0.0,) * ndims) + (1.0,))) + elif usesvd or ndims != 3: + # Rigid transformation via SVD of covariance matrix + u, s, vh = numpy.linalg.svd(numpy.dot(v1, v0.T)) + # rotation matrix from SVD orthonormal bases + R = numpy.dot(u, vh) + if numpy.linalg.det(R) < 0.0: + # R does not constitute right handed system + R -= numpy.outer(u[:, ndims - 1], vh[ndims - 1, :] * 2.0) + s[-1] *= -1.0 + # homogeneous transformation matrix + M = numpy.identity(ndims + 1) + M[:ndims, :ndims] = R + else: + # Rigid transformation matrix via quaternion + # compute symmetric matrix N + xx, yy, zz = numpy.sum(v0 * v1, axis=1) + xy, yz, zx = numpy.sum(v0 * numpy.roll(v1, -1, axis=0), axis=1) + xz, yx, zy = numpy.sum(v0 * numpy.roll(v1, -2, axis=0), axis=1) + N = [[xx + yy + zz, 0.0, 0.0, 0.0], + [yz - zy, xx - yy - zz, 0.0, 0.0], + [zx - xz, xy + yx, yy - xx - zz, 0.0], + [xy - yx, zx + xz, yz + zy, zz - xx - yy]] + # quaternion: eigenvector corresponding to most positive eigenvalue + w, V = numpy.linalg.eigh(N) + q = V[:, numpy.argmax(w)] + q /= vector_norm(q) # unit quaternion + # homogeneous transformation matrix + M = quaternion_matrix(q) + + if scale and not shear: + # Affine transformation; scale is ratio of RMS deviations from centroid + v0 *= v0 + v1 *= v1 + M[:ndims, :ndims] *= math.sqrt(numpy.sum(v1) / numpy.sum(v0)) + + # move centroids back + M = numpy.dot(numpy.linalg.inv(M1), numpy.dot(M, M0)) + M /= M[ndims, ndims] + return M + + +def superimposition_matrix(v0, v1, scale=False, usesvd=True): + """Return matrix to transform given 3D point set into second point set. + + v0 and v1 are shape (3, \*) or (4, \*) arrays of at least 3 points. + + The parameters scale and usesvd are explained in the more general + affine_matrix_from_points function. + + The returned matrix is a similarity or Euclidean transformation matrix. + This function has a fast C implementation in transformations.c. + + >>> v0 = numpy.random.rand(3, 10) + >>> M = superimposition_matrix(v0, v0) + >>> numpy.allclose(M, numpy.identity(4)) + True + >>> R = random_rotation_matrix(numpy.random.random(3)) + >>> v0 = [[1,0,0], [0,1,0], [0,0,1], [1,1,1]] + >>> v1 = numpy.dot(R, v0) + >>> M = superimposition_matrix(v0, v1) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20 + >>> v0[3] = 1 + >>> v1 = numpy.dot(R, v0) + >>> M = superimposition_matrix(v0, v1) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + >>> S = scale_matrix(random.random()) + >>> T = translation_matrix(numpy.random.random(3)-0.5) + >>> M = concatenate_matrices(T, R, S) + >>> v1 = numpy.dot(M, v0) + >>> v0[:3] += numpy.random.normal(0, 1e-9, 300).reshape(3, -1) + >>> M = superimposition_matrix(v0, v1, scale=True) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False) + >>> numpy.allclose(v1, numpy.dot(M, v0)) + True + >>> v = numpy.empty((4, 100, 3)) + >>> v[:, :, 0] = v0 + >>> M = superimposition_matrix(v0, v1, scale=True, usesvd=False) + >>> numpy.allclose(v1, numpy.dot(M, v[:, :, 0])) + True + + """ + v0 = numpy.array(v0, dtype=numpy.float64, copy=False)[:3] + v1 = numpy.array(v1, dtype=numpy.float64, copy=False)[:3] + return affine_matrix_from_points(v0, v1, shear=False, + scale=scale, usesvd=usesvd) + + +def euler_matrix(ai, aj, ak, axes='sxyz'): + """Return homogeneous rotation matrix from Euler angles and axis sequence. + + ai, aj, ak : Euler's roll, pitch and yaw angles + axes : One of 24 axis sequences as string or encoded tuple + + >>> R = euler_matrix(1, 2, 3, 'syxz') + >>> numpy.allclose(numpy.sum(R[0]), -1.34786452) + True + >>> R = euler_matrix(1, 2, 3, (0, 1, 0, 1)) + >>> numpy.allclose(numpy.sum(R[0]), -0.383436184) + True + >>> ai, aj, ak = (4*math.pi) * (numpy.random.random(3) - 0.5) + >>> for axes in _AXES2TUPLE.keys(): + ... R = euler_matrix(ai, aj, ak, axes) + >>> for axes in _TUPLE2AXES.keys(): + ... R = euler_matrix(ai, aj, ak, axes) + + """ + try: + firstaxis, parity, repetition, frame = _AXES2TUPLE[axes] + except (AttributeError, KeyError): + _TUPLE2AXES[axes] # validation + firstaxis, parity, repetition, frame = axes + + i = firstaxis + j = _NEXT_AXIS[i + parity] + k = _NEXT_AXIS[i - parity + 1] + + if frame: + ai, ak = ak, ai + if parity: + ai, aj, ak = -ai, -aj, -ak + + si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak) + ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak) + cc, cs = ci * ck, ci * sk + sc, ss = si * ck, si * sk + + M = numpy.identity(4) + if repetition: + M[i, i] = cj + M[i, j] = sj * si + M[i, k] = sj * ci + M[j, i] = sj * sk + M[j, j] = -cj * ss + cc + M[j, k] = -cj * cs - sc + M[k, i] = -sj * ck + M[k, j] = cj * sc + cs + M[k, k] = cj * cc - ss + else: + M[i, i] = cj * ck + M[i, j] = sj * sc - cs + M[i, k] = sj * cc + ss + M[j, i] = cj * sk + M[j, j] = sj * ss + cc + M[j, k] = sj * cs - sc + M[k, i] = -sj + M[k, j] = cj * si + M[k, k] = cj * ci + return M + + +def euler_from_matrix(matrix, axes='sxyz'): + """Return Euler angles from rotation matrix for specified axis sequence. + + axes : One of 24 axis sequences as string or encoded tuple + + Note that many Euler angle triplets can describe one matrix. + + >>> R0 = euler_matrix(1, 2, 3, 'syxz') + >>> al, be, ga = euler_from_matrix(R0, 'syxz') + >>> R1 = euler_matrix(al, be, ga, 'syxz') + >>> numpy.allclose(R0, R1) + True + >>> angles = (4*math.pi) * (numpy.random.random(3) - 0.5) + >>> for axes in _AXES2TUPLE.keys(): + ... R0 = euler_matrix(axes=axes, *angles) + ... R1 = euler_matrix(axes=axes, *euler_from_matrix(R0, axes)) + ... if not numpy.allclose(R0, R1): print(axes, "failed") + + """ + try: + firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()] + except (AttributeError, KeyError): + _TUPLE2AXES[axes] # validation + firstaxis, parity, repetition, frame = axes + + i = firstaxis + j = _NEXT_AXIS[i + parity] + k = _NEXT_AXIS[i - parity + 1] + + M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:3, :3] + if repetition: + sy = math.sqrt(M[i, j] * M[i, j] + M[i, k] * M[i, k]) + if sy > _EPS: + ax = math.atan2(M[i, j], M[i, k]) + ay = math.atan2(sy, M[i, i]) + az = math.atan2(M[j, i], -M[k, i]) + else: + ax = math.atan2(-M[j, k], M[j, j]) + ay = math.atan2(sy, M[i, i]) + az = 0.0 + else: + cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i]) + if cy > _EPS: + ax = math.atan2(M[k, j], M[k, k]) + ay = math.atan2(-M[k, i], cy) + az = math.atan2(M[j, i], M[i, i]) + else: + ax = math.atan2(-M[j, k], M[j, j]) + ay = math.atan2(-M[k, i], cy) + az = 0.0 + + if parity: + ax, ay, az = -ax, -ay, -az + if frame: + ax, az = az, ax + return ax, ay, az + + +def euler_from_quaternion(quaternion, axes='sxyz'): + """Return Euler angles from quaternion for specified axis sequence. + + >>> angles = euler_from_quaternion([0.99810947, 0.06146124, 0, 0]) + >>> numpy.allclose(angles, [0.123, 0, 0]) + True + + """ + return euler_from_matrix(quaternion_matrix(quaternion), axes) + + +def quaternion_from_euler(ai, aj, ak, axes='sxyz'): + """Return quaternion from Euler angles and axis sequence. + + ai, aj, ak : Euler's roll, pitch and yaw angles + axes : One of 24 axis sequences as string or encoded tuple + + >>> q = quaternion_from_euler(1, 2, 3, 'ryxz') + >>> numpy.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435]) + True + + """ + try: + firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()] + except (AttributeError, KeyError): + _TUPLE2AXES[axes] # validation + firstaxis, parity, repetition, frame = axes + + i = firstaxis + 1 + j = _NEXT_AXIS[i + parity - 1] + 1 + k = _NEXT_AXIS[i - parity] + 1 + + if frame: + ai, ak = ak, ai + if parity: + aj = -aj + + ai /= 2.0 + aj /= 2.0 + ak /= 2.0 + ci = math.cos(ai) + si = math.sin(ai) + cj = math.cos(aj) + sj = math.sin(aj) + ck = math.cos(ak) + sk = math.sin(ak) + cc = ci * ck + cs = ci * sk + sc = si * ck + ss = si * sk + + q = numpy.empty((4,)) + if repetition: + q[0] = cj * (cc - ss) + q[i] = cj * (cs + sc) + q[j] = sj * (cc + ss) + q[k] = sj * (cs - sc) + else: + q[0] = cj * cc + sj * ss + q[i] = cj * sc - sj * cs + q[j] = cj * ss + sj * cc + q[k] = cj * cs - sj * sc + if parity: + q[j] *= -1.0 + + return q + + +def quaternion_about_axis(angle, axis): + """Return quaternion for rotation about axis. + + >>> q = quaternion_about_axis(0.123, [1, 0, 0]) + >>> numpy.allclose(q, [0.99810947, 0.06146124, 0, 0]) + True + + """ + q = numpy.array([0.0, axis[0], axis[1], axis[2]]) + qlen = vector_norm(q) + if qlen > _EPS: + q *= math.sin(angle / 2.0) / qlen + q[0] = math.cos(angle / 2.0) + return q + + +def quaternion_matrix(quaternion): + """Return homogeneous rotation matrix from quaternion. + + >>> M = quaternion_matrix([0.99810947, 0.06146124, 0, 0]) + >>> numpy.allclose(M, rotation_matrix(0.123, [1, 0, 0])) + True + >>> M = quaternion_matrix([1, 0, 0, 0]) + >>> numpy.allclose(M, numpy.identity(4)) + True + >>> M = quaternion_matrix([0, 1, 0, 0]) + >>> numpy.allclose(M, numpy.diag([1, -1, -1, 1])) + True + + """ + q = numpy.array(quaternion, dtype=numpy.float64, copy=True) + n = numpy.dot(q, q) + if n < _EPS: + return numpy.identity(4) + q *= math.sqrt(2.0 / n) + q = numpy.outer(q, q) + return numpy.array([ + [1.0 - q[2, 2] - q[3, 3], q[1, 2] - q[3, 0], q[1, 3] + q[2, 0], 0.0], + [q[1, 2] + q[3, 0], 1.0 - q[1, 1] - q[3, 3], q[2, 3] - q[1, 0], 0.0], + [q[1, 3] - q[2, 0], q[2, 3] + q[1, 0], 1.0 - q[1, 1] - q[2, 2], 0.0], + [0.0, 0.0, 0.0, 1.0]]) + + +def quaternion_from_matrix(matrix, isprecise=False): + """Return quaternion from rotation matrix. + + If isprecise is True, the input matrix is assumed to be a precise rotation + matrix and a faster algorithm is used. + + >>> q = quaternion_from_matrix(numpy.identity(4), True) + >>> numpy.allclose(q, [1, 0, 0, 0]) + True + >>> q = quaternion_from_matrix(numpy.diag([1, -1, -1, 1])) + >>> numpy.allclose(q, [0, 1, 0, 0]) or numpy.allclose(q, [0, -1, 0, 0]) + True + >>> R = rotation_matrix(0.123, (1, 2, 3)) + >>> q = quaternion_from_matrix(R, True) + >>> numpy.allclose(q, [0.9981095, 0.0164262, 0.0328524, 0.0492786]) + True + >>> R = [[-0.545, 0.797, 0.260, 0], [0.733, 0.603, -0.313, 0], + ... [-0.407, 0.021, -0.913, 0], [0, 0, 0, 1]] + >>> q = quaternion_from_matrix(R) + >>> numpy.allclose(q, [0.19069, 0.43736, 0.87485, -0.083611]) + True + >>> R = [[0.395, 0.362, 0.843, 0], [-0.626, 0.796, -0.056, 0], + ... [-0.677, -0.498, 0.529, 0], [0, 0, 0, 1]] + >>> q = quaternion_from_matrix(R) + >>> numpy.allclose(q, [0.82336615, -0.13610694, 0.46344705, -0.29792603]) + True + >>> R = random_rotation_matrix() + >>> q = quaternion_from_matrix(R) + >>> is_same_transform(R, quaternion_matrix(q)) + True + + """ + M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:4, :4] + if isprecise: + q = numpy.empty((4,)) + t = numpy.trace(M) + if t > M[3, 3]: + q[0] = t + q[3] = M[1, 0] - M[0, 1] + q[2] = M[0, 2] - M[2, 0] + q[1] = M[2, 1] - M[1, 2] + else: + i, j, k = 1, 2, 3 + if M[1, 1] > M[0, 0]: + i, j, k = 2, 3, 1 + if M[2, 2] > M[i, i]: + i, j, k = 3, 1, 2 + t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3] + q[i] = t + q[j] = M[i, j] + M[j, i] + q[k] = M[k, i] + M[i, k] + q[3] = M[k, j] - M[j, k] + q *= 0.5 / math.sqrt(t * M[3, 3]) + else: + m00 = M[0, 0] + m01 = M[0, 1] + m02 = M[0, 2] + m10 = M[1, 0] + m11 = M[1, 1] + m12 = M[1, 2] + m20 = M[2, 0] + m21 = M[2, 1] + m22 = M[2, 2] + # symmetric matrix K + K = numpy.array([[m00 - m11 - m22, 0.0, 0.0, 0.0], + [m01 + m10, m11 - m00 - m22, 0.0, 0.0], + [m02 + m20, m12 + m21, m22 - m00 - m11, 0.0], + [m21 - m12, m02 - m20, m10 - m01, m00 + m11 + m22]]) + K /= 3.0 + # quaternion is eigenvector of K that corresponds to largest eigenvalue + w, V = numpy.linalg.eigh(K) + q = V[[3, 0, 1, 2], numpy.argmax(w)] + if q[0] < 0.0: + numpy.negative(q, q) + return q + + +def quaternion_multiply(quaternion1, quaternion0): + """Return multiplication of two quaternions. + + >>> q = quaternion_multiply([4, 1, -2, 3], [8, -5, 6, 7]) + >>> numpy.allclose(q, [28, -44, -14, 48]) + True + + """ + w0, x0, y0, z0 = quaternion0 + w1, x1, y1, z1 = quaternion1 + return numpy.array([-x1 * x0 - y1 * y0 - z1 * z0 + w1 * w0, + x1 * w0 + y1 * z0 - z1 * y0 + w1 * x0, + -x1 * z0 + y1 * w0 + z1 * x0 + w1 * y0, + x1 * y0 - y1 * x0 + z1 * w0 + w1 * z0], dtype=numpy.float64) + + +def quaternion_conjugate(quaternion): + """Return conjugate of quaternion. + + >>> q0 = random_quaternion() + >>> q1 = quaternion_conjugate(q0) + >>> q1[0] == q0[0] and all(q1[1:] == -q0[1:]) + True + + """ + q = numpy.array(quaternion, dtype=numpy.float64, copy=True) + numpy.negative(q[1:], q[1:]) + return q + + +def quaternion_inverse(quaternion): + """Return inverse of quaternion. + + >>> q0 = random_quaternion() + >>> q1 = quaternion_inverse(q0) + >>> numpy.allclose(quaternion_multiply(q0, q1), [1, 0, 0, 0]) + True + + """ + q = numpy.array(quaternion, dtype=numpy.float64, copy=True) + numpy.negative(q[1:], q[1:]) + return q / numpy.dot(q, q) + + +def quaternion_real(quaternion): + """Return real part of quaternion. + + >>> quaternion_real([3, 0, 1, 2]) + 3.0 + + """ + return float(quaternion[0]) + + +def quaternion_imag(quaternion): + """Return imaginary part of quaternion. + + >>> quaternion_imag([3, 0, 1, 2]) + array([ 0., 1., 2.]) + + """ + return numpy.array(quaternion[1:4], dtype=numpy.float64, copy=True) + + +def quaternion_slerp(quat0, quat1, fraction, spin=0, shortestpath=True): + """Return spherical linear interpolation between two quaternions. + + >>> q0 = random_quaternion() + >>> q1 = random_quaternion() + >>> q = quaternion_slerp(q0, q1, 0) + >>> numpy.allclose(q, q0) + True + >>> q = quaternion_slerp(q0, q1, 1, 1) + >>> numpy.allclose(q, q1) + True + >>> q = quaternion_slerp(q0, q1, 0.5) + >>> angle = math.acos(numpy.dot(q0, q)) + >>> numpy.allclose(2, math.acos(numpy.dot(q0, q1)) / angle) or \ + numpy.allclose(2, math.acos(-numpy.dot(q0, q1)) / angle) + True + + """ + q0 = unit_vector(quat0[:4]) + q1 = unit_vector(quat1[:4]) + if fraction == 0.0: + return q0 + elif fraction == 1.0: + return q1 + d = numpy.dot(q0, q1) + if abs(abs(d) - 1.0) < _EPS: + return q0 + if shortestpath and d < 0.0: + # invert rotation + d = -d + numpy.negative(q1, q1) + angle = math.acos(d) + spin * math.pi + if abs(angle) < _EPS: + return q0 + isin = 1.0 / math.sin(angle) + q0 *= math.sin((1.0 - fraction) * angle) * isin + q1 *= math.sin(fraction * angle) * isin + q0 += q1 + return q0 + + +def random_quaternion(rand=None): + """Return uniform random unit quaternion. + + rand: array like or None + Three independent random variables that are uniformly distributed + between 0 and 1. + + >>> q = random_quaternion() + >>> numpy.allclose(1, vector_norm(q)) + True + >>> q = random_quaternion(numpy.random.random(3)) + >>> len(q.shape), q.shape[0]==4 + (1, True) + + """ + if rand is None: + rand = numpy.random.rand(3) + else: + assert len(rand) == 3 + r1 = numpy.sqrt(1.0 - rand[0]) + r2 = numpy.sqrt(rand[0]) + pi2 = math.pi * 2.0 + t1 = pi2 * rand[1] + t2 = pi2 * rand[2] + return numpy.array([numpy.cos(t2) * r2, numpy.sin(t1) * r1, + numpy.cos(t1) * r1, numpy.sin(t2) * r2]) + + +def random_rotation_matrix(rand=None): + """Return uniform random rotation matrix. + + rand: array like + Three independent random variables that are uniformly distributed + between 0 and 1 for each returned quaternion. + + >>> R = random_rotation_matrix() + >>> numpy.allclose(numpy.dot(R.T, R), numpy.identity(4)) + True + + """ + return quaternion_matrix(random_quaternion(rand)) + + +class Arcball(object): + """Virtual Trackball Control. + + >>> ball = Arcball() + >>> ball = Arcball(initial=numpy.identity(4)) + >>> ball.place([320, 320], 320) + >>> ball.down([500, 250]) + >>> ball.drag([475, 275]) + >>> R = ball.matrix() + >>> numpy.allclose(numpy.sum(R), 3.90583455) + True + >>> ball = Arcball(initial=[1, 0, 0, 0]) + >>> ball.place([320, 320], 320) + >>> ball.setaxes([1, 1, 0], [-1, 1, 0]) + >>> ball.constrain = True + >>> ball.down([400, 200]) + >>> ball.drag([200, 400]) + >>> R = ball.matrix() + >>> numpy.allclose(numpy.sum(R), 0.2055924) + True + >>> ball.next() + + """ + + def __init__(self, initial=None): + """Initialize virtual trackball control. + + initial : quaternion or rotation matrix + + """ + self._axis = None + self._axes = None + self._radius = 1.0 + self._center = [0.0, 0.0] + self._vdown = numpy.array([0.0, 0.0, 1.0]) + self._constrain = False + if initial is None: + self._qdown = numpy.array([1.0, 0.0, 0.0, 0.0]) + else: + initial = numpy.array(initial, dtype=numpy.float64) + if initial.shape == (4, 4): + self._qdown = quaternion_from_matrix(initial) + elif initial.shape == (4,): + initial /= vector_norm(initial) + self._qdown = initial + else: + raise ValueError("initial not a quaternion or matrix") + self._qnow = self._qpre = self._qdown + + def place(self, center, radius): + """Place Arcball, e.g. when window size changes. + + center : sequence[2] + Window coordinates of trackball center. + radius : float + Radius of trackball in window coordinates. + + """ + self._radius = float(radius) + self._center[0] = center[0] + self._center[1] = center[1] + + def setaxes(self, *axes): + """Set axes to constrain rotations.""" + if axes is None: + self._axes = None + else: + self._axes = [unit_vector(axis) for axis in axes] + + @property + def constrain(self): + """Return state of constrain to axis mode.""" + return self._constrain + + @constrain.setter + def constrain(self, value): + """Set state of constrain to axis mode.""" + self._constrain = bool(value) + + def down(self, point): + """Set initial cursor window coordinates and pick constrain-axis.""" + self._vdown = arcball_map_to_sphere(point, self._center, self._radius) + self._qdown = self._qpre = self._qnow + if self._constrain and self._axes is not None: + self._axis = arcball_nearest_axis(self._vdown, self._axes) + self._vdown = arcball_constrain_to_axis(self._vdown, self._axis) + else: + self._axis = None + + def drag(self, point): + """Update current cursor window coordinates.""" + vnow = arcball_map_to_sphere(point, self._center, self._radius) + if self._axis is not None: + vnow = arcball_constrain_to_axis(vnow, self._axis) + self._qpre = self._qnow + t = numpy.cross(self._vdown, vnow) + if numpy.dot(t, t) < _EPS: + self._qnow = self._qdown + else: + q = [numpy.dot(self._vdown, vnow), t[0], t[1], t[2]] + self._qnow = quaternion_multiply(q, self._qdown) + + def next(self, acceleration=0.0): + """Continue rotation in direction of last drag.""" + q = quaternion_slerp(self._qpre, self._qnow, 2.0 + acceleration, False) + self._qpre, self._qnow = self._qnow, q + + def matrix(self): + """Return homogeneous rotation matrix.""" + return quaternion_matrix(self._qnow) + + +def arcball_map_to_sphere(point, center, radius): + """Return unit sphere coordinates from window coordinates.""" + v0 = (point[0] - center[0]) / radius + v1 = (center[1] - point[1]) / radius + n = v0 * v0 + v1 * v1 + if n > 1.0: + # position outside of sphere + n = math.sqrt(n) + return numpy.array([v0 / n, v1 / n, 0.0]) + else: + return numpy.array([v0, v1, math.sqrt(1.0 - n)]) + + +def arcball_constrain_to_axis(point, axis): + """Return sphere point perpendicular to axis.""" + v = numpy.array(point, dtype=numpy.float64, copy=True) + a = numpy.array(axis, dtype=numpy.float64, copy=True) + v -= a * numpy.dot(a, v) # on plane + n = vector_norm(v) + if n > _EPS: + if v[2] < 0.0: + numpy.negative(v, v) + v /= n + return v + if a[2] == 1.0: + return numpy.array([1.0, 0.0, 0.0]) + return unit_vector([-a[1], a[0], 0.0]) + + +def arcball_nearest_axis(point, axes): + """Return axis, which arc is nearest to point.""" + point = numpy.array(point, dtype=numpy.float64, copy=False) + nearest = None + mx = -1.0 + for axis in axes: + t = numpy.dot(arcball_constrain_to_axis(point, axis), point) + if t > mx: + nearest = axis + mx = t + return nearest + + +# epsilon for testing whether a number is close to zero +_EPS = numpy.finfo(float).eps * 4.0 + +# axis sequences for Euler angles +_NEXT_AXIS = [1, 2, 0, 1] + +# map axes strings to/from tuples of inner axis, parity, repetition, frame +_AXES2TUPLE = { + 'sxyz': (0, 0, 0, 0), 'sxyx': (0, 0, 1, 0), 'sxzy': (0, 1, 0, 0), + 'sxzx': (0, 1, 1, 0), 'syzx': (1, 0, 0, 0), 'syzy': (1, 0, 1, 0), + 'syxz': (1, 1, 0, 0), 'syxy': (1, 1, 1, 0), 'szxy': (2, 0, 0, 0), + 'szxz': (2, 0, 1, 0), 'szyx': (2, 1, 0, 0), 'szyz': (2, 1, 1, 0), + 'rzyx': (0, 0, 0, 1), 'rxyx': (0, 0, 1, 1), 'ryzx': (0, 1, 0, 1), + 'rxzx': (0, 1, 1, 1), 'rxzy': (1, 0, 0, 1), 'ryzy': (1, 0, 1, 1), + 'rzxy': (1, 1, 0, 1), 'ryxy': (1, 1, 1, 1), 'ryxz': (2, 0, 0, 1), + 'rzxz': (2, 0, 1, 1), 'rxyz': (2, 1, 0, 1), 'rzyz': (2, 1, 1, 1)} + +_TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items()) + + +def vector_norm(data, axis=None, out=None): + """Return length, i.e. Euclidean norm, of ndarray along axis. + + >>> v = numpy.random.random(3) + >>> n = vector_norm(v) + >>> numpy.allclose(n, numpy.linalg.norm(v)) + True + >>> v = numpy.random.rand(6, 5, 3) + >>> n = vector_norm(v, axis=-1) + >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=2))) + True + >>> n = vector_norm(v, axis=1) + >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1))) + True + >>> v = numpy.random.rand(5, 4, 3) + >>> n = numpy.empty((5, 3)) + >>> vector_norm(v, axis=1, out=n) + >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1))) + True + >>> vector_norm([]) + 0.0 + >>> vector_norm([1]) + 1.0 + + """ + data = numpy.array(data, dtype=numpy.float64, copy=True) + if out is None: + if data.ndim == 1: + return math.sqrt(numpy.dot(data, data)) + data *= data + out = numpy.atleast_1d(numpy.sum(data, axis=axis)) + numpy.sqrt(out, out) + return out + else: + data *= data + numpy.sum(data, axis=axis, out=out) + numpy.sqrt(out, out) + + +def unit_vector(data, axis=None, out=None): + """Return ndarray normalized by length, i.e. Euclidean norm, along axis. + + >>> v0 = numpy.random.random(3) + >>> v1 = unit_vector(v0) + >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0)) + True + >>> v0 = numpy.random.rand(5, 4, 3) + >>> v1 = unit_vector(v0, axis=-1) + >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2) + >>> numpy.allclose(v1, v2) + True + >>> v1 = unit_vector(v0, axis=1) + >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1) + >>> numpy.allclose(v1, v2) + True + >>> v1 = numpy.empty((5, 4, 3)) + >>> unit_vector(v0, axis=1, out=v1) + >>> numpy.allclose(v1, v2) + True + >>> list(unit_vector([])) + [] + >>> list(unit_vector([1])) + [1.0] + + """ + if out is None: + data = numpy.array(data, dtype=numpy.float64, copy=True) + if data.ndim == 1: + data /= math.sqrt(numpy.dot(data, data)) + return data + else: + if out is not data: + out[:] = numpy.array(data, copy=False) + data = out + length = numpy.atleast_1d(numpy.sum(data * data, axis)) + numpy.sqrt(length, length) + if axis is not None: + length = numpy.expand_dims(length, axis) + data /= length + if out is None: + return data + + +def random_vector(size): + """Return array of random doubles in the half-open interval [0.0, 1.0). + + >>> v = random_vector(10000) + >>> numpy.all(v >= 0) and numpy.all(v < 1) + True + >>> v0 = random_vector(10) + >>> v1 = random_vector(10) + >>> numpy.any(v0 == v1) + False + + """ + return numpy.random.random(size) + + +def vector_product(v0, v1, axis=0): + """Return vector perpendicular to vectors. + + >>> v = vector_product([2, 0, 0], [0, 3, 0]) + >>> numpy.allclose(v, [0, 0, 6]) + True + >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]] + >>> v1 = [[3], [0], [0]] + >>> v = vector_product(v0, v1) + >>> numpy.allclose(v, [[0, 0, 0, 0], [0, 0, 6, 6], [0, -6, 0, -6]]) + True + >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]] + >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]] + >>> v = vector_product(v0, v1, axis=1) + >>> numpy.allclose(v, [[0, 0, 6], [0, -6, 0], [6, 0, 0], [0, -6, 6]]) + True + + """ + return numpy.cross(v0, v1, axis=axis) + + +def angle_between_vectors(v0, v1, directed=True, axis=0): + """Return angle between vectors. + + If directed is False, the input vectors are interpreted as undirected axes, + i.e. the maximum angle is pi/2. + + >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3]) + >>> numpy.allclose(a, math.pi) + True + >>> a = angle_between_vectors([1, -2, 3], [-1, 2, -3], directed=False) + >>> numpy.allclose(a, 0) + True + >>> v0 = [[2, 0, 0, 2], [0, 2, 0, 2], [0, 0, 2, 2]] + >>> v1 = [[3], [0], [0]] + >>> a = angle_between_vectors(v0, v1) + >>> numpy.allclose(a, [0, 1.5708, 1.5708, 0.95532]) + True + >>> v0 = [[2, 0, 0], [2, 0, 0], [0, 2, 0], [2, 0, 0]] + >>> v1 = [[0, 3, 0], [0, 0, 3], [0, 0, 3], [3, 3, 3]] + >>> a = angle_between_vectors(v0, v1, axis=1) + >>> numpy.allclose(a, [1.5708, 1.5708, 1.5708, 0.95532]) + True + + """ + v0 = numpy.array(v0, dtype=numpy.float64, copy=False) + v1 = numpy.array(v1, dtype=numpy.float64, copy=False) + dot = numpy.sum(v0 * v1, axis=axis) + dot /= vector_norm(v0, axis=axis) * vector_norm(v1, axis=axis) + return numpy.arccos(dot if directed else numpy.fabs(dot)) + + +def inverse_matrix(matrix): + """Return inverse of square transformation matrix. + + >>> M0 = random_rotation_matrix() + >>> M1 = inverse_matrix(M0.T) + >>> numpy.allclose(M1, numpy.linalg.inv(M0.T)) + True + >>> for size in range(1, 7): + ... M0 = numpy.random.rand(size, size) + ... M1 = inverse_matrix(M0) + ... if not numpy.allclose(M1, numpy.linalg.inv(M0)): print(size) + + """ + return numpy.linalg.inv(matrix) + + +def concatenate_matrices(*matrices): + """Return concatenation of series of transformation matrices. + + >>> M = numpy.random.rand(16).reshape((4, 4)) - 0.5 + >>> numpy.allclose(M, concatenate_matrices(M)) + True + >>> numpy.allclose(numpy.dot(M, M.T), concatenate_matrices(M, M.T)) + True + + """ + M = numpy.identity(4) + for i in matrices: + M = numpy.dot(M, i) + return M + + +def is_same_transform(matrix0, matrix1): + """Return True if two matrices perform same transformation. + + >>> is_same_transform(numpy.identity(4), numpy.identity(4)) + True + >>> is_same_transform(numpy.identity(4), random_rotation_matrix()) + False + + """ + matrix0 = numpy.array(matrix0, dtype=numpy.float64, copy=True) + matrix0 /= matrix0[3, 3] + matrix1 = numpy.array(matrix1, dtype=numpy.float64, copy=True) + matrix1 /= matrix1[3, 3] + return numpy.allclose(matrix0, matrix1) + + +def _import_module(name, package=None, warn=True, prefix='_py_', ignore='_'): + """Try import all public attributes from module into global namespace. + + Existing attributes with name clashes are renamed with prefix. + Attributes starting with underscore are ignored by default. + + Return True on successful import. + + """ + import warnings + from importlib import import_module + try: + if not package: + module = import_module(name) + else: + module = import_module('.' + name, package=package) + except ImportError: + if warn: + warnings.warn("failed to import module %s" % name) + else: + for attr in dir(module): + if ignore and attr.startswith(ignore): + continue + if prefix: + if attr in globals(): + globals()[prefix + attr] = globals()[attr] + elif warn: + warnings.warn("no Python implementation of " + attr) + globals()[attr] = getattr(module, attr) + return True + + +# _import_module('_transformations') + +if __name__ == "__main__": + import doctest + import random # used in doctests + + numpy.set_printoptions(suppress=True, precision=5) + doctest.testmod() diff --git a/bop_toolkit_lib/view_sampler.py b/bop_toolkit_lib/view_sampler.py new file mode 100644 index 0000000..441d888 --- /dev/null +++ b/bop_toolkit_lib/view_sampler.py @@ -0,0 +1,291 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Sampling of views from a sphere.""" + +import math +import numpy as np + +from bop_toolkit_lib import transform +from bop_toolkit_lib import inout + + +def fibonacci_sampling(n_pts, radius=1.0): + """Samples an odd number of almost equidistant 3D points from the Fibonacci + lattice on a unit sphere. + + Latitude (elevation) represents the rotation angle around the X axis. + Longitude (azimuth) represents the rotation angle around the Z axis. + + Ref: + [1] https://arxiv.org/pdf/0912.4540.pdf + [2] http://stackoverflow.com/questions/34302938/map-point-to-closest-point-on-fibonacci-lattice + [3] http://stackoverflow.com/questions/9600801/evenly-distributing-n-points-on-a-sphere + [4] https://www.openprocessing.org/sketch/41142 + + :param n_pts: Number of 3D points to sample (an odd number). + :param radius: Radius of the sphere. + :return: List of 3D points on the sphere surface. + """ + # Needs to be an odd number [1]. + assert (n_pts % 2 == 1) + n_pts_half = int(n_pts / 2) + + phi = (math.sqrt(5.0) + 1.0) / 2.0 # Golden ratio. + phi_inv = phi - 1.0 + ga = 2.0 * math.pi * phi_inv # Complement to the golden angle. + + pts = [] + for i in range(-n_pts_half, n_pts_half + 1): + lat = math.asin((2 * i) / float(2 * n_pts_half + 1)) + lon = (ga * i) % (2 * math.pi) + + # Convert the latitude and longitude angles to 3D coordinates. + s = math.cos(lat) * radius + x, y, z = math.cos(lon) * s, math.sin(lon) * s, math.tan(lat) * s + pts.append([x, y, z]) + + # Calculate rotation matrix and translation vector. + # Note: lat,lon=0,0 is a camera looking to the sphere center from + # (-radius, 0, 0) in the world (i.e. sphere) coordinate system. + # pi_half = 0.5 * math.pi + # alpha_x = -lat - pi_half + # alpha_z = lon + pi_half + # R_x = transform.rotation_matrix(alpha_x, [1, 0, 0])[:3, :3] + # R_z = transform.rotation_matrix(alpha_z, [0, 0, 1])[:3, :3] + # R = np.linalg.inv(R_z.dot(R_x)) + # t = -R.dot(np.array([x, y, z]).reshape((3, 1))) + + return pts + + +def hinter_sampling(min_n_pts, radius=1.0): + """Samples 3D points on a sphere surface by refining an icosahedron, as in: + Hinterstoisser et al., Simultaneous Recognition and Homography Extraction of + Local Patches with a Simple Linear Classifier, BMVC 2008 + + :param min_n_pts: The minimum number of points to sample on the whole sphere. + :param radius: Radius of the sphere. + :return: 3D points on the sphere surface and a list with indices of refinement + levels on which the points were created. + """ + # Vertices and faces of an icosahedron. + a, b, c = 0.0, 1.0, (1.0 + math.sqrt(5.0)) / 2.0 + pts = [(-b, c, a), (b, c, a), (-b, -c, a), (b, -c, a), (a, -b, c), (a, b, c), + (a, -b, -c), (a, b, -c), (c, a, -b), (c, a, b), (-c, a, -b), + (-c, a, b)] + faces = [(0, 11, 5), (0, 5, 1), (0, 1, 7), (0, 7, 10), (0, 10, 11), (1, 5, 9), + (5, 11, 4), (11, 10, 2), (10, 7, 6), (7, 1, 8), (3, 9, 4), (3, 4, 2), + (3, 2, 6), (3, 6, 8), (3, 8, 9), (4, 9, 5), (2, 4, 11), (6, 2, 10), + (8, 6, 7), (9, 8, 1)] + + # Refinement levels on which the points were created. + pts_level = [0 for _ in range(len(pts))] + + ref_level = 0 + while len(pts) < min_n_pts: + ref_level += 1 + edge_pt_map = {} # Mapping from an edge to a newly added point on the edge. + faces_new = [] # New set of faces. + + # Each face is replaced by four new smaller faces. + for face in faces: + pt_inds = list(face) # List of point ID's involved in the new faces. + for i in range(3): + + # Add a new point if this edge has not been processed yet, or get ID of + # the already added point. + edge = (face[i], face[(i + 1) % 3]) + edge = (min(edge), max(edge)) + if edge not in edge_pt_map.keys(): + pt_new_id = len(pts) + edge_pt_map[edge] = pt_new_id + pt_inds.append(pt_new_id) + + pt_new = 0.5 * (np.array(pts[edge[0]]) + np.array(pts[edge[1]])) + pts.append(pt_new.tolist()) + pts_level.append(ref_level) + else: + pt_inds.append(edge_pt_map[edge]) + + # Replace the current face with four new faces. + faces_new += [(pt_inds[0], pt_inds[3], pt_inds[5]), + (pt_inds[3], pt_inds[1], pt_inds[4]), + (pt_inds[3], pt_inds[4], pt_inds[5]), + (pt_inds[5], pt_inds[4], pt_inds[2])] + faces = faces_new + + # Project the points to a sphere. + pts = np.array(pts) + pts *= np.reshape(radius / np.linalg.norm(pts, axis=1), (pts.shape[0], 1)) + + # Collect point connections. + pt_conns = {} + for face in faces: + for i in range(len(face)): + pt_conns.setdefault(face[i], set()).add(face[(i + 1) % len(face)]) + pt_conns[face[i]].add(face[(i + 2) % len(face)]) + + # Order the points - starting from the top one and adding the connected points + # sorted by azimuth. + top_pt_id = np.argmax(pts[:, 2]) + pts_ordered = [] + pts_todo = [top_pt_id] + pts_done = [False for _ in range(pts.shape[0])] + + def calc_azimuth(x, y): + two_pi = 2.0 * math.pi + return (math.atan2(y, x) + two_pi) % two_pi + + while len(pts_ordered) != pts.shape[0]: + # Sort by azimuth. + pts_todo = sorted( + pts_todo, key=lambda i: calc_azimuth(pts[i][0], pts[i][1])) + pts_todo_new = [] + for pt_id in pts_todo: + pts_ordered.append(pt_id) + pts_done[pt_id] = True + pts_todo_new += [i for i in pt_conns[pt_id]] # Find the connected points. + + # Points to be processed in the next iteration. + pts_todo = [i for i in set(pts_todo_new) if not pts_done[i]] + + # Re-order the points and faces. + pts = pts[np.array(pts_ordered), :] + pts_level = [pts_level[i] for i in pts_ordered] + pts_order = np.zeros((pts.shape[0],)) + pts_order[np.array(pts_ordered)] = np.arange(pts.shape[0]) + for face_id in range(len(faces)): + faces[face_id] = [pts_order[i] for i in faces[face_id]] + + # import inout + # inout.save_ply('output/hinter_sampling.ply', pts=pts, faces=np.array(faces)) + + return pts, pts_level + + +def sample_views( + min_n_views, radius=1.0, azimuth_range=(0, 2 * math.pi), + elev_range=(-0.5 * math.pi, 0.5 * math.pi), mode='hinterstoisser'): + """Viewpoint sampling from a view sphere. + + :param min_n_views: The min. number of points to sample on the whole sphere. + :param radius: Radius of the sphere. + :param azimuth_range: Azimuth range from which the viewpoints are sampled. + :param elev_range: Elevation range from which the viewpoints are sampled. + :param mode: Type of sampling (options: 'hinterstoisser' or 'fibonacci'). + :return: List of views, each represented by a 3x3 ndarray with a rotation + matrix and a 3x1 ndarray with a translation vector. + """ + # Get points on a sphere. + if mode == 'hinterstoisser': + pts, pts_level = hinter_sampling(min_n_views, radius=radius) + elif mode == 'fibonacci': + n_views = min_n_views + if n_views % 2 != 1: + n_views += 1 + + pts = fibonacci_sampling(n_views, radius=radius) + pts_level = [0 for _ in range(len(pts))] + else: + raise ValueError('Unknown view sampling mode.') + + views = [] + for pt in pts: + # Azimuth from (0, 2 * pi). + azimuth = math.atan2(pt[1], pt[0]) + if azimuth < 0: + azimuth += 2.0 * math.pi + + # Elevation from (-0.5 * pi, 0.5 * pi). + a = np.linalg.norm(pt) + b = np.linalg.norm([pt[0], pt[1], 0]) + elev = math.acos(b / a) + if pt[2] < 0: + elev = -elev + + if not (azimuth_range[0] <= azimuth <= azimuth_range[1] and + elev_range[0] <= elev <= elev_range[1]): + continue + + # Rotation matrix. + # Adopted from gluLookAt function (uses OpenGL coordinate system): + # [1] http://stackoverflow.com/questions/5717654/glulookat-explanation + # [2] https://www.opengl.org/wiki/GluLookAt_code + f = -np.array(pt) # Forward direction. + f /= np.linalg.norm(f) + u = np.array([0.0, 0.0, 1.0]) # Up direction. + s = np.cross(f, u) # Side direction. + if np.count_nonzero(s) == 0: + # f and u are parallel, i.e. we are looking along or against Z axis. + s = np.array([1.0, 0.0, 0.0]) + s /= np.linalg.norm(s) + u = np.cross(s, f) # Recompute up. + R = np.array([[s[0], s[1], s[2]], + [u[0], u[1], u[2]], + [-f[0], -f[1], -f[2]]]) + + # Convert from OpenGL to OpenCV coordinate system. + R_yz_flip = transform.rotation_matrix(math.pi, [1, 0, 0])[:3, :3] + R = R_yz_flip.dot(R) + + # Translation vector. + t = -R.dot(np.array(pt).reshape((3, 1))) + + views.append({'R': R, 't': t}) + + return views, pts_level + + +def save_vis(path, views, views_level=None): + """ + Creates a PLY file visualizing the views. + + :param path: Path to output PLY file. + :param views: Views as returned by sample_views(). + :param views_level: View levels as returned by sample_views(). + """ + pts = [] + normals = [] + colors = [] + for view_id, view in enumerate(views): + R_inv = np.linalg.inv(view['R']) + pts += [R_inv.dot(-view['t']).squeeze(), + R_inv.dot(np.array([[0.01, 0, 0]]).T - view['t']).squeeze(), + R_inv.dot(np.array([[0, 0.01, 0]]).T - view['t']).squeeze(), + R_inv.dot(np.array([[0, 0, 0.01]]).T - view['t']).squeeze()] + + normal = R_inv.dot(np.array([0, 0, 1]).reshape((3, 1))) + normals += [normal.squeeze(), + np.array([0, 0, 0]), + np.array([0, 0, 0]), + np.array([0, 0, 0])] + + if views_level: + max_level = max(1, max(views_level)) + intens = (255 * views_level[view_id]) / float(max_level) + else: + intens = 255 * view_id / float(len(views)) + colors += [[intens, intens, intens], + [255, 0, 0], + [0, 255, 0], + [0, 0, 255]] + + inout.save_ply2(path, + pts=np.array(pts), + pts_normals=np.array(normals), + pts_colors=np.array(colors)) + + +if __name__ == '__main__': + + # Example of sampling views from a view sphere. + views, views_level = sample_views( + min_n_views=25, + radius=1, + azimuth_range=(0, 2 * math.pi), + elev_range=(-0.5 * math.pi, 0.5 * math.pi), + mode='fibonacci') + misc.log('Sampled views: ' + str(len(views))) + out_views_vis_path = '../output/view_sphere.ply' + save_vis(out_views_vis_path, views) diff --git a/bop_toolkit_lib/visibility.py b/bop_toolkit_lib/visibility.py new file mode 100644 index 0000000..b90a8ae --- /dev/null +++ b/bop_toolkit_lib/visibility.py @@ -0,0 +1,75 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Estimation of the visible object surface from depth images.""" + +import numpy as np + + +def _estimate_visib_mask(d_test, d_model, delta, visib_mode='bop19'): + """Estimates a mask of the visible object surface. + + :param d_test: Distance image of a scene in which the visibility is estimated. + :param d_model: Rendered distance image of the object model. + :param delta: Tolerance used in the visibility test. + :param visib_mode: Visibility mode: + 1) 'bop18' - Object is considered NOT VISIBLE at pixels with missing depth. + 2) 'bop19' - Object is considered VISIBLE at pixels with missing depth. This + allows to use the VSD pose error function also on shiny objects, which + are typically not captured well by the depth sensors. A possible problem + with this mode is that some invisible parts can be considered visible. + However, the shadows of missing depth measurements, where this problem is + expected to appear and which are often present at depth discontinuities, + are typically relatively narrow and therefore this problem is less + significant. + :return: Visibility mask. + """ + assert (d_test.shape == d_model.shape) + + if visib_mode == 'bop18': + mask_valid = np.logical_and(d_test > 0, d_model > 0) + d_diff = d_model.astype(np.float32) - d_test.astype(np.float32) + visib_mask = np.logical_and(d_diff <= delta, mask_valid) + + elif visib_mode == 'bop19': + d_diff = d_model.astype(np.float32) - d_test.astype(np.float32) + visib_mask = np.logical_and( + np.logical_or(d_diff <= delta, d_test == 0), d_model > 0) + + else: + raise ValueError('Unknown visibility mode.') + + return visib_mask + + +def estimate_visib_mask_gt(d_test, d_gt, delta, visib_mode='bop19'): + """Estimates a mask of the visible object surface in the ground-truth pose. + + :param d_test: Distance image of a scene in which the visibility is estimated. + :param d_gt: Rendered distance image of the object model in the GT pose. + :param delta: Tolerance used in the visibility test. + :param visib_mode: See _estimate_visib_mask. + :return: Visibility mask. + """ + visib_gt = _estimate_visib_mask(d_test, d_gt, delta, visib_mode) + return visib_gt + + +def estimate_visib_mask_est(d_test, d_est, visib_gt, delta, visib_mode='bop19'): + """Estimates a mask of the visible object surface in the estimated pose. + + For an explanation of why the visibility mask is calculated differently for + the estimated and the ground-truth pose, see equation (14) and related text in + Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16. + + :param d_test: Distance image of a scene in which the visibility is estimated. + :param d_est: Rendered distance image of the object model in the est. pose. + :param visib_gt: Visibility mask of the object model in the GT pose (from + function estimate_visib_mask_gt). + :param delta: Tolerance used in the visibility test. + :param visib_mode: See _estimate_visib_mask. + :return: Visibility mask. + """ + visib_est = _estimate_visib_mask(d_test, d_est, delta, visib_mode) + visib_est = np.logical_or(visib_est, np.logical_and(visib_gt, d_est > 0)) + return visib_est diff --git a/bop_toolkit_lib/visualization.py b/bop_toolkit_lib/visualization.py new file mode 100644 index 0000000..beeb4d5 --- /dev/null +++ b/bop_toolkit_lib/visualization.py @@ -0,0 +1,222 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Visualization utilities.""" + +import os +import numpy as np +import matplotlib.pyplot as plt +from PIL import Image, ImageDraw, ImageFont + +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc + + +def draw_rect(im, rect, color=(1.0, 1.0, 1.0)): + """Draws a rectangle on an image. + + :param im: ndarray (uint8) on which the rectangle will be drawn. + :param rect: Rectangle defined as [x, y, width, height], where [x, y] is the + top-left corner. + :param color: Color of the rectangle. + :return: Image with drawn rectangle. + """ + if im.dtype != np.uint8: + raise ValueError('The image must be of type uint8.') + + im_pil = Image.fromarray(im) + draw = ImageDraw.Draw(im_pil) + draw.rectangle((rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3]), + outline=tuple([int(c * 255) for c in color]), fill=None) + del draw + return np.asarray(im_pil) + + +def write_text_on_image(im, txt_list, loc=(3, 0), color=(1.0, 1.0, 1.0), + size=20): + """Writes text info on an image. + + :param im: ndarray on which the text info will be written. + :param txt_list: List of dictionaries, each describing one info line: + - 'name': Entry name. + - 'val': Entry value. + - 'fmt': String format for the value. + :param loc: Location of the top left corner of the text box. + :param color: Font color. + :param size: Font size. + :return: Image with written text info. + """ + im_pil = Image.fromarray(im) + + # Load font. + try: + font_path = os.path.join(os.path.dirname(__file__), 'droid_sans_mono.ttf') + font = ImageFont.truetype(font_path, size) + except IOError: + misc.log('Warning: Loading a fallback font.') + font = ImageFont.load_default() + + draw = ImageDraw.Draw(im_pil) + for info in txt_list: + if info['name'] != '': + txt_tpl = '{}:{' + info['fmt'] + '}' + else: + txt_tpl = '{}{' + info['fmt'] + '}' + txt = txt_tpl.format(info['name'], info['val']) + draw.text(loc, txt, fill=tuple([int(c * 255) for c in color]), font=font) + text_width, text_height = font.getsize(txt) + loc = (loc[0], loc[1] + text_height) + del draw + + return np.array(im_pil) + + +def depth_for_vis(depth, valid_start=0.2, valid_end=1.0): + """Transforms depth values from the specified range to [0, 255]. + + :param depth: ndarray with a depth image (1 channel). + :param valid_start: The beginning of the depth range. + :param valid_end: The end of the depth range. + :return: Transformed depth image. + """ + mask = depth > 0 + depth_n = depth.astype(np.float) + depth_n[mask] -= depth_n[mask].min() + depth_n[mask] /= depth_n[mask].max() / (valid_end - valid_start) + depth_n[mask] += valid_start + return depth_n + + +def vis_object_poses( + poses, K, renderer, rgb=None, depth=None, vis_rgb_path=None, + vis_depth_diff_path=None, vis_rgb_resolve_visib=False): + """Visualizes 3D object models in specified poses in a single image. + + Two visualizations are created: + 1. An RGB visualization (if vis_rgb_path is not None). + 2. A Depth-difference visualization (if vis_depth_diff_path is not None). + + :param poses: List of dictionaries, each with info about one pose: + - 'obj_id': Object ID. + - 'R': 3x3 ndarray with a rotation matrix. + - 't': 3x1 ndarray with a translation vector. + - 'text_info': Info to write at the object (see write_text_on_image). + :param K: 3x3 ndarray with an intrinsic camera matrix. + :param renderer: Instance of the Renderer class (see renderer.py). + :param rgb: ndarray with the RGB input image. + :param depth: ndarray with the depth input image. + :param vis_rgb_path: Path to the output RGB visualization. + :param vis_depth_diff_path: Path to the output depth-difference visualization. + :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects + (i.e. only the closest object is visualized at each pixel). + """ + fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + + # Indicators of visualization types. + vis_rgb = vis_rgb_path is not None + vis_depth_diff = vis_depth_diff_path is not None + + if vis_rgb and rgb is None: + raise ValueError('RGB visualization triggered but RGB image not provided.') + + if (vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib)) and depth is None: + raise ValueError('Depth visualization triggered but D image not provided.') + + # Prepare images for rendering. + im_size = None + ren_rgb = None + ren_rgb_info = None + ren_depth = None + + if vis_rgb: + im_size = (rgb.shape[1], rgb.shape[0]) + ren_rgb = np.zeros(rgb.shape, np.uint8) + ren_rgb_info = np.zeros(rgb.shape, np.uint8) + + if vis_depth_diff: + if im_size and im_size != (depth.shape[1], depth.shape[0]): + raise ValueError('The RGB and D images must have the same size.') + else: + im_size = (depth.shape[1], depth.shape[0]) + + if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): + ren_depth = np.zeros((im_size[1], im_size[0]), np.float32) + + # Render the pose estimates one by one. + for pose in poses: + + # Rendering. + ren_out = renderer.render_object( + pose['obj_id'], pose['R'], pose['t'], fx, fy, cx, cy) + + m_rgb = None + if vis_rgb: + m_rgb = ren_out['rgb'] + + m_mask = None + if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib): + m_depth = ren_out['depth'] + + # Get mask of the surface parts that are closer than the + # surfaces rendered before. + visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth) + m_mask = np.logical_and(m_depth != 0, visible_mask) + + ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype) + + # Combine the RGB renderings. + if vis_rgb: + if vis_rgb_resolve_visib: + ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype) + else: + ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(np.float32) + ren_rgb_f[ren_rgb_f > 255] = 255 + ren_rgb = ren_rgb_f.astype(np.uint8) + + # Draw 2D bounding box and write text info. + obj_mask = np.sum(m_rgb > 0, axis=2) + ys, xs = obj_mask.nonzero() + if len(ys): + # bbox_color = model_color + # text_color = model_color + bbox_color = (0.3, 0.3, 0.3) + text_color = (1.0, 1.0, 1.0) + text_size = 11 + + bbox = misc.calc_2d_bbox(xs, ys, im_size) + im_size = (obj_mask.shape[1], obj_mask.shape[0]) + ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color) + + if 'text_info' in pose: + text_loc = (bbox[0] + 2, bbox[1]) + ren_rgb_info = write_text_on_image( + ren_rgb_info, pose['text_info'], text_loc, color=text_color, + size=text_size) + + # Blend and save the RGB visualization. + if vis_rgb: + vis_im_rgb = 0.5 * rgb.astype(np.float32) + \ + 0.5 * ren_rgb.astype(np.float32) + \ + 1.0 * ren_rgb_info.astype(np.float32) + vis_im_rgb[vis_im_rgb > 255] = 255 + misc.ensure_dir(os.path.dirname(vis_rgb_path)) + inout.save_im(vis_rgb_path, vis_im_rgb.astype(np.uint8), jpg_quality=95) + + # Save the image of depth differences. + if vis_depth_diff: + # Calculate the depth difference at pixels where both depth maps + # are valid. + valid_mask = (depth > 0) * (ren_depth > 0) + depth_diff = valid_mask * (depth - ren_depth.astype(np.float32)) + + f, ax = plt.subplots(1, 1) + cax = ax.matshow(depth_diff) + ax.axis('off') + ax.set_title('captured - GT depth [mm]') + f.colorbar(cax, fraction=0.03, pad=0.01) + f.tight_layout(pad=0) + + if not vis_rgb: + misc.ensure_dir(os.path.dirname(vis_depth_diff_path)) + plt.savefig(vis_depth_diff_path, pad=0, bbox_inches='tight', quality=95) + plt.close() diff --git a/docs/bop_datasets_format.md b/docs/bop_datasets_format.md new file mode 100644 index 0000000..6c6fb78 --- /dev/null +++ b/docs/bop_datasets_format.md @@ -0,0 +1,167 @@ +# Format of the BOP datasets + +This file describes the common format of the BOP datasets [1]. + + +## Directory structure + +The datasets have the following structure: + + +* *models[\_MODELTYPE]* - 3D object models. +* *models[\_MODELTYPE]\_eval* - "Uniformly" resampled and decimated 3D object + models used for calculation of errors of object pose estimates. + + +* *train[\_TRAINTYPE]/X* (optional) - Training images of object X. +* *val[\_VALTYPE]/Y* (optional) - Validation images of scene Y. +* *test[\_TESTTYPE]/Y* - Test images of scene Y. + + +* *camera.json* - Camera parameters (for sensor simulation only; per-image + camera parameters are in files *scene_camera.json* - see below). +* *dataset_info.md* - Dataset-specific information. +* *test_targets_bop19.json* - A list of test targets used for the evaluation in + the BOP paper [1] and in the BOP Challenge 2019. + + +*MODELTYPE*, *TRAINTYPE*, *VALTYPE* and *TESTTYPE* are optional and used if more +data types are available (e.g. images from different sensors). + +The images in *train*, *val* and *test* folders are organized into subfolders: + +* *rgb/gray* - Color/gray images. +* *depth* - Depth images (saved as 16-bit unsigned short). +* *mask* (optional) - Masks of object silhouettes. +* *mask_visib* (optional) - Masks of the visible parts of object silhouettes. + +The corresponding images across the subolders have the same ID, e.g. +*rgb/000000.png* and *depth/000000.png* is the color and the depth image +of the same RGB-D frame. + + +## Training, validation and test images + +If both validation and test images are available for a dataset, the ground-truth +annotations are public only for the validation images. Performance scores for +test images with private ground-truth annotations can be calculated in the +[BOP evaluation system](http://bop.felk.cvut.cz). + +### Camera parameters + +Each set of images is accompanied with file *scene\_camera.json* which contains +the following information for each image: + +* *cam\_K* - 3x3 intrinsic camera matrix K (saved row-wise). +* *depth_scale* - Multiply the depth image with this factor to get depth in mm. +* *cam\_R\_w2c* (optional) - 3x3 rotation matrix R\_w2c (saved row-wise). +* *cam\_t\_w2c* (optional) - 3x1 translation vector t\_w2c. +* *view\_level* (optional) - Viewpoint subdivision level, see below. + +The matrix K may be different for each image. For example, the principal point +is not constant for images in T-LESS as the images were obtained by cropping a +region around the projection of the origin of the world coordinate system. + +Note that the intrinsic camera parameters can be found also in file +*camera.json* in the root folder of a dataset. These parameters are meant only +for simulation of the used sensor when rendering training images. + +P\_w2i = K * [R\_w2c, t\_w2c] is the camera matrix which transforms 3D point +p\_w = [x, y, z, 1]' in the world coordinate system to 2D point p\_i = +[u, v, 1]' in the image coordinate system: s * p\_i = P\_w2i * p\_w. + +### Ground-truth annotations + +The ground truth object poses are provided in files *scene_gt.json* which +contain the following information for each annotated object instance: + +* *obj\_id* - Object ID. +* *cam\_R\_m2c* - 3x3 rotation matrix R\_m2c (saved row-wise). +* *cam\_t\_m2c* - 3x1 translation vector t\_m2c. + +P\_m2i = K * [R\_m2c, t\_m2c] is the camera matrix which transforms 3D point +p\_m = [x, y, z, 1]' in the model coordinate system to 2D point p\_i = +[u, v, 1]' in the image coordinate system: s * p\_i = P\_m2i * p\_m. + +### Meta information about the ground-truth poses + +The following meta information about the ground-truth poses is provided in files +*scene_gt_info.json* (calculated using *scripts/calc_gt_info.py*, with delta = +5mm for ITODD and 15mm for other datasets): + +* *bbox\_obj* - 2D bounding box of the object silhouette given by (x, y, width, + height), where (x, y) is the top-left corner of the bounding box. +* *bbox\_visib* - 2D bounding box of the visible part of the object silhouette. +* *px\_count\_all* - Number of pixels in the object silhouette. +* *px\_count\_valid* - Number of pixels in the object silhouette with a valid + depth measurement (i.e. with a non-zero value in the depth image). +* *px\_count\_visib* - Number of pixels in the visible part of the object + silhouette. +* *visib\_fract* - The visible fraction of the object silhouette (= *px\_count\_visib*/*px\_count +_all*). + + +## Acquisition of training images + +Most of the datasets include training images which were obtained either by +capturing real objects from various viewpoints or by rendering 3D object models +(using *scripts/render_train_imgs.py*). + +The viewpoints, from which the objects were rendered, were sampled from a view +sphere as in [2] by recursively subdividing an icosahedron. The level of +subdivision at which a viewpoint was added is saved in *scene_camera.json* as +*view_level* (viewpoints corresponding to vertices of the icosahedron have +*view_level* = 0, viewpoints obtained in the first subdivision step have +*view_level* = 1, etc.). To reduce the number of viewpoints while preserving +their "uniform" distribution over the sphere surface, one can consider only +viewpoints with *view_level* <= n, where n is the highest considered level of +subdivision. + +For rendering, the radius of the view sphere was set to the distance of the +closest occurrence of any annotated object instance over all test images. The +distance was calculated from the camera center to the origin of the model +coordinate system. + + +## 3D object models + +The 3D object models are provided in PLY (ascii) format. All models include +vertex normals. Most of the models include also vertex color or vertex texture +coordinates with the texture saved as a separate image. +The vertex normals were calculated using +[MeshLab](http://meshlab.sourceforge.net/) as the angle-weighted sum of face +normals incident to a vertex [4]. + +Each folder with object models contains file *models_info.json*, which includes +the 3D bounding box and the diameter for each object model. The diameter is +calculated as the largest distance between any pair of model vertices. + + +## Coordinate systems + +All coordinate systems (model, camera, world) are right-handed. +In the model coordinate system, the Z axis points up (when the object is +standing "naturally up-right") and the origin coincides with the center of the +3D bounding box of the object model. +The camera coordinate system is as in +[OpenCV](http://docs.opencv.org/2.4/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html) +with the camera looking along the Z axis. + + +## Units + +* Depth images: See files *camera.json/scene_camera.json* in individual + datasets. +* 3D object models: 1 mm +* Translation vectors: 1 mm + + +## References + +[1] Hodan, Michel et al. "BOP: Benchmark for 6D Object Pose Estimation" ECCV'18. + +[2] Hinterstoisser et al. "Model based training, detection and pose estimation + of texture-less 3d objects in heavily cluttered scenes" ACCV'12. + +[3] Thurrner and Wuthrich "Computing vertex normals from polygonal + facets" Journal of Graphics Tools 3.1 (1998). diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1d26e40 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +numpy==1.16.4 +matplotlib==2.2.4 +imageio==2.5.0 +scipy==1.2.2 +pypng==0.0.19 +Cython==0.29.10 +PyOpenGL==3.1.0 +triangle==20190115.2 +glumpy==1.1.0 +opencv-python==4.1.0.25 +Pillow==6.0.0 diff --git a/scripts/calc_gt_distribution.py b/scripts/calc_gt_distribution.py new file mode 100644 index 0000000..7bb5aa3 --- /dev/null +++ b/scripts/calc_gt_distribution.py @@ -0,0 +1,121 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Calculates distribution of GT poses.""" + +import math +import numpy as np +import matplotlib.pyplot as plt + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc + + +# PARAMETERS. +################################################################################ +p = { + # See dataset_params.py for options. + 'dataset': 'lmo', + + # Dataset split. Options: 'train', 'val', 'test'. + 'dataset_split': 'test', + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, +} +################################################################################ + + +# Load dataset parameters. +dp_split = dataset_params.get_split_params( + p['datasets_path'], p['dataset'], p['dataset_split']) + +scene_ids = dp_split['scene_ids'] +dists = [] +azimuths = [] +elevs = [] +visib_fracts = [] +ims_count = 0 +for scene_id in scene_ids: + misc.log('Processing - dataset: {} {}, scene: {}'.format( + p['dataset'], p['dataset_split'], scene_id)) + + # Load GT poses. + scene_gt = inout.load_scene_gt( + dp_split['scene_gt_tpath'].format(scene_id=scene_id)) + + # Load info about the GT poses. + scene_gt_info = inout.load_json( + dp_split['scene_gt_info_tpath'].format(scene_id=scene_id), keys_to_int=True) + + ims_count += len(scene_gt) + + for im_id in scene_gt.keys(): + for gt_id, im_gt in enumerate(scene_gt[im_id]): + + # Object distance. + dist = np.linalg.norm(im_gt['cam_t_m2c']) + dists.append(dist) + + # Camera origin in the model coordinate system. + cam_orig_m = -np.linalg.inv(im_gt['cam_R_m2c']).dot( + im_gt['cam_t_m2c']) + + # Azimuth from [0, 360]. + azimuth = math.atan2(cam_orig_m[1, 0], cam_orig_m[0, 0]) + if azimuth < 0: + azimuth += 2.0 * math.pi + azimuths.append((180.0 / math.pi) * azimuth) + + # Elevation from [-90, 90]. + a = np.linalg.norm(cam_orig_m) + b = np.linalg.norm([cam_orig_m[0, 0], cam_orig_m[1, 0], 0]) + elev = math.acos(b / a) + if cam_orig_m[2, 0] < 0: + elev = -elev + elevs.append((180.0 / math.pi) * elev) + + # Visibility fraction. + visib_fracts.append(scene_gt_info[im_id][gt_id]['visib_fract']) + +# Print stats. +misc.log('Stats of the GT poses in dataset {} {}:'.format( + p['dataset'], p['dataset_split'])) +misc.log('Number of images: ' + str(ims_count)) + +misc.log('Min dist: {}'.format(np.min(dists))) +misc.log('Max dist: {}'.format(np.max(dists))) +misc.log('Mean dist: {}'.format(np.mean(dists))) + +misc.log('Min azimuth: {}'.format(np.min(azimuths))) +misc.log('Max azimuth: {}'.format(np.max(azimuths))) +misc.log('Mean azimuth: {}'.format(np.mean(azimuths))) + +misc.log('Min elev: {}'.format(np.min(elevs))) +misc.log('Max elev: {}'.format(np.max(elevs))) +misc.log('Mean elev: {}'.format(np.mean(elevs))) + +misc.log('Min visib fract: {}'.format(np.min(visib_fracts))) +misc.log('Max visib fract: {}'.format(np.max(visib_fracts))) +misc.log('Mean visib fract: {}'.format(np.mean(visib_fracts))) + +# Visualize distributions. +plt.figure() +plt.hist(dists, bins=100) +plt.title('Object distance') + +plt.figure() +plt.hist(azimuths, bins=100) +plt.title('Azimuth') + +plt.figure() +plt.hist(elevs, bins=100) +plt.title('Elevation') + +plt.figure() +plt.hist(visib_fracts, bins=100) +plt.title('Visibility fraction') + +plt.show() diff --git a/scripts/calc_gt_info.py b/scripts/calc_gt_info.py new file mode 100644 index 0000000..28aeba7 --- /dev/null +++ b/scripts/calc_gt_info.py @@ -0,0 +1,172 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Calculates visibility, 2D bounding boxes etc. for the ground-truth poses. + +See docs/bop_datasets_format.md for documentation of the calculated info. + +The info is saved in folder "{train,val,test}_gt_info" in the main folder of the +selected dataset. +""" + +import os +import numpy as np + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import renderer +from bop_toolkit_lib import visibility + + +# PARAMETERS. +################################################################################ +p = { + # See dataset_params.py for options. + 'dataset': 'lm', + + # Dataset split. Options: 'train', 'val', 'test'. + 'dataset_split': 'test', + + # Dataset split type. None = default. See dataset_params.py for options. + 'dataset_split_type': None, + + # Whether to save visualizations of visibility masks. + 'vis_visibility_masks': False, + + # Tolerance used in the visibility test [mm]. + 'delta': 15, + + # Type of the renderer. + 'renderer_type': 'python', # Options: 'cpp', 'python'. + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, + + # Path template for output images with object masks. + 'vis_mask_visib_tpath': os.path.join( + config.output_path, 'vis_gt_visib_delta={delta}', + 'vis_gt_visib_delta={delta}', '{dataset}', '{split}', '{scene_id:06d}', + '{im_id:06d}_{gt_id:06d}.jpg'), +} +################################################################################ + + +if p['vis_visibility_masks']: + from bop_toolkit_lib import visualization + +# Load dataset parameters. +dp_split = dataset_params.get_split_params( + p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type']) + +model_type = None +if p['dataset'] == 'tless': + model_type = 'cad' +dp_model = dataset_params.get_model_params( + p['datasets_path'], p['dataset'], model_type) + +# Initialize a renderer. +misc.log('Initializing renderer...') +width, height = dp_split['im_size'] +ren = renderer.create_renderer( + width, height, p['renderer_type'], mode='depth') +for obj_id in dp_model['obj_ids']: + ren.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) + +for scene_id in dp_split['scene_ids']: + + # Load scene info and ground-truth poses. + scene_camera = inout.load_scene_camera( + dp_split['scene_camera_tpath'].format(scene_id=scene_id)) + scene_gt = inout.load_scene_gt( + dp_split['scene_gt_tpath'].format(scene_id=scene_id)) + + scene_gt_info = {} + im_ids = sorted(scene_gt.keys()) + for im_counter, im_id in enumerate(im_ids): + if im_counter % 100 == 0: + misc.log( + 'Calculating GT info - dataset: {} ({}, {}), scene: {}, im: {}'.format( + p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id, + im_id)) + + # Load depth image. + depth = inout.load_depth(dp_split['depth_tpath'].format( + scene_id=scene_id, im_id=im_id)) + depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. + + K = scene_camera[im_id]['cam_K'] + fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + im_size = (depth.shape[1], depth.shape[0]) + + scene_gt_info[im_id] = [] + for gt_id, gt in enumerate(scene_gt[im_id]): + + # Render depth image of the object model in the ground-truth pose. + depth_gt = ren.render_object( + gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'], fx, fy, cx, cy)['depth'] + + # Convert depth images to distance images. + dist_gt = misc.depth_im_to_dist_im(depth_gt, K) + dist_im = misc.depth_im_to_dist_im(depth, K) + + # Estimation of the visibility mask. + visib_gt = visibility.estimate_visib_mask_gt( + dist_im, dist_gt, p['delta'], visib_mode='bop19') + + # Visible surface fraction. + obj_mask_gt = dist_gt > 0 + px_count_valid = np.sum(dist_im[obj_mask_gt] > 0) + px_count_visib = visib_gt.sum() + px_count_all = obj_mask_gt.sum() + if px_count_all > 0: + visib_fract = px_count_visib / float(px_count_all) + else: + visib_fract = 0.0 + + # Bounding box of the object projection. + bbox = [-1, -1, -1, -1] + if px_count_visib > 0: + ys, xs = obj_mask_gt.nonzero() + bbox = misc.calc_2d_bbox(xs, ys, im_size) + + # Bounding box of the visible surface part. + bbox_visib = [-1, -1, -1, -1] + if px_count_visib > 0: + ys, xs = visib_gt.nonzero() + bbox_visib = misc.calc_2d_bbox(xs, ys, im_size) + + # Store the calculated info. + scene_gt_info[im_id].append({ + 'px_count_all': int(px_count_all), + 'px_count_visib': int(px_count_visib), + 'px_count_valid': int(px_count_valid), + 'visib_fract': float(visib_fract), + 'bbox_obj': [int(e) for e in bbox], + 'bbox_visib': [int(e) for e in bbox_visib] + }) + + # Visualization of the visibility mask. + if p['vis_visibility_masks']: + + depth_im_vis = visualization.depth_for_vis(depth, 0.2, 1.0) + depth_im_vis = np.dstack([depth_im_vis] * 3) + + visib_gt_vis = visib_gt.astype(np.float) + zero_ch = np.zeros(visib_gt_vis.shape) + visib_gt_vis = np.dstack([zero_ch, visib_gt_vis, zero_ch]) + + vis = 0.5 * depth_im_vis + 0.5 * visib_gt_vis + vis[vis > 1] = 1 + + vis_path = p['vis_mask_visib_tpath'].format( + delta=p['delta'], dataset=p['dataset'], split=p['dataset_split'], + scene_id=scene_id, im_id=im_id, gt_id=gt_id) + misc.ensure_dir(os.path.dirname(vis_path)) + inout.save_im(vis_path, vis) + + # Save the info for the current scene. + scene_gt_info_path = dp_split['scene_gt_info_tpath'].format(scene_id=scene_id) + misc.ensure_dir(os.path.dirname(scene_gt_info_path)) + inout.save_json(scene_gt_info_path, scene_gt_info) diff --git a/scripts/calc_gt_masks.py b/scripts/calc_gt_masks.py new file mode 100644 index 0000000..e88d2dc --- /dev/null +++ b/scripts/calc_gt_masks.py @@ -0,0 +1,126 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Calculates masks of object models in the ground-truth poses.""" + +import os +import numpy as np + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import renderer +from bop_toolkit_lib import visibility + + +# PARAMETERS. +################################################################################ +p = { + # See dataset_params.py for options. + 'dataset': 'lm', + + # Dataset split. Options: 'train', 'val', 'test'. + 'dataset_split': 'test', + + # Dataset split type. None = default. See dataset_params.py for options. + 'dataset_split_type': None, + + # Tolerance used in the visibility test [mm]. + 'delta': 15, # 5 for ITODD, 15 for the other datasets. + + # Type of the renderer. + 'renderer_type': 'python', # Options: 'cpp', 'python'. + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, +} +################################################################################ + + +# Load dataset parameters. +dp_split = dataset_params.get_split_params( + p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type']) + +model_type = None +if p['dataset'] == 'tless': + model_type = 'cad' +dp_model = dataset_params.get_model_params( + p['datasets_path'], p['dataset'], model_type) + +for scene_id in dp_split['scene_ids']: + + # Load scene GT. + scene_gt_path = dp_split['scene_gt_tpath'].format( + scene_id=scene_id) + scene_gt = inout.load_scene_gt(scene_gt_path) + + # Load scene camera. + scene_camera_path = dp_split['scene_camera_tpath'].format( + scene_id=scene_id) + scene_camera = inout.load_scene_camera(scene_camera_path) + + # Create folders for the output masks (if they do not exist yet). + mask_dir_path = os.path.dirname( + dp_split['mask_tpath'].format( + scene_id=scene_id, im_id=0, gt_id=0)) + misc.ensure_dir(mask_dir_path) + + mask_visib_dir_path = os.path.dirname( + dp_split['mask_visib_tpath'].format( + scene_id=scene_id, im_id=0, gt_id=0)) + misc.ensure_dir(mask_visib_dir_path) + + # Initialize a renderer. + misc.log('Initializing renderer...') + width, height = dp_split['im_size'] + ren = renderer.create_renderer( + width, height, renderer_type=p['renderer_type'], mode='depth') + + # Add object models. + for obj_id in dp_model['obj_ids']: + ren.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) + + im_ids = sorted(scene_gt.keys()) + for im_id in im_ids: + + if im_id % 100 == 0: + misc.log( + 'Calculating masks - dataset: {} ({}, {}), scene: {}, im: {}'.format( + p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id, + im_id)) + + K = scene_camera[im_id]['cam_K'] + fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + + # Load depth image. + depth_path = dp_split['depth_tpath'].format( + scene_id=scene_id, im_id=im_id) + depth_im = inout.load_depth(depth_path) + depth_im *= scene_camera[im_id]['depth_scale'] # to [mm] + dist_im = misc.depth_im_to_dist_im(depth_im, K) + + for gt_id, gt in enumerate(scene_gt[im_id]): + + # Render the depth image. + depth_gt = ren.render_object( + gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'], fx, fy, cx, cy)['depth'] + + # Convert depth image to distance image. + dist_gt = misc.depth_im_to_dist_im(depth_gt, K) + + # Mask of the full object silhouette. + mask = dist_gt > 0 + + # Mask of the visible part of the object silhouette. + mask_visib = visibility.estimate_visib_mask_gt( + dist_im, dist_gt, p['delta'], visib_mode='bop19') + + # Save the calculated masks. + mask_path = dp_split['mask_tpath'].format( + scene_id=scene_id, im_id=im_id, gt_id=gt_id) + inout.save_im(mask_path, 255 * mask.astype(np.uint8)) + + mask_visib_path = dp_split['mask_visib_tpath'].format( + scene_id=scene_id, im_id=im_id, gt_id=gt_id) + inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8)) diff --git a/scripts/calc_model_info.py b/scripts/calc_model_info.py new file mode 100644 index 0000000..71e7b31 --- /dev/null +++ b/scripts/calc_model_info.py @@ -0,0 +1,54 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Calculates the 3D bounding box and the diameter of 3D object models.""" + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc + + +# PARAMETERS. +################################################################################ +p = { + # See dataset_params.py for options. + 'dataset': 'lm', + + # Type of input object models. + 'model_type': 'eval', + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, +} +################################################################################ + + +# Load dataset parameters. +dp_split = dataset_params.get_split_params( + p['datasets_path'], p['dataset'], 'train') + +dp_model = dataset_params.get_model_params( + p['datasets_path'], p['dataset'], p['model_type']) + +models_info = {} +for obj_id in dp_model['obj_ids']: + misc.log('Processing model of object {}...'.format(obj_id)) + + model = inout.load_ply(dp_model['model_tpath'].format(obj_id=obj_id)) + + # Calculate 3D bounding box. + ref_pt = map(float, model['pts'].min(axis=0).flatten()) + size = map(float, (model['pts'].max(axis=0) - ref_pt).flatten()) + + # Calculated diameter. + diameter = misc.calc_pts_diameter(model['pts']) + + models_info[obj_id] = { + 'min_x': ref_pt[0], 'min_y': ref_pt[1], 'min_z': ref_pt[2], + 'size_x': size[0], 'size_y': size[1], 'size_z': size[2], + 'diameter': diameter + } + +# Save the calculated info about the object models. +inout.save_json(dp_split['models_info_path'], models_info) diff --git a/scripts/check_results_bop19.py b/scripts/check_results_bop19.py new file mode 100644 index 0000000..aca7d2f --- /dev/null +++ b/scripts/check_results_bop19.py @@ -0,0 +1,46 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Evaluation script for the BOP Challenge 2019.""" + +import os +import argparse + +from bop_toolkit_lib import config +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc + + +# PARAMETERS (some can be overwritten by the command line arguments below). +################################################################################ +p = { + # Names of files with results for which to calculate the errors (assumed to be + # stored in folder config.eval_path). See docs/bop_challenge_2019.md for a + # description of the format. Example results can be found at: + # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/ + 'result_filenames': [ + '/path/to/csv/with/results', + ], +} +################################################################################ + + +# Command line arguments. +# ------------------------------------------------------------------------------ +parser = argparse.ArgumentParser() +parser.add_argument('--result_filenames', + default=','.join(p['result_filenames']), + help='Comma-separated names of files with results.') +args = parser.parse_args() + +p['result_filenames'] = args.result_filenames.split(',') + + +if __name__ == '__main__': + + for result_filename in p['result_filenames']: + result_path = os.path.join(config.results_path, result_filename) + check_passed, check_msg = inout.check_bop_results( + result_path, version='bop19') + + misc.log('Check msg: {}'.format(check_msg)) diff --git a/scripts/eval_bop19.py b/scripts/eval_bop19.py new file mode 100644 index 0000000..b0b4934 --- /dev/null +++ b/scripts/eval_bop19.py @@ -0,0 +1,192 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Evaluation script for the BOP Challenge 2019.""" + +import os +import time +import argparse +import subprocess +import numpy as np + +from bop_toolkit_lib import config +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc + + +# PARAMETERS (some can be overwritten by the command line arguments below). +################################################################################ +p = { + # Errors to calculate. + 'errors': [ + { + 'n_top': -1, + 'type': 'vsd', + 'vsd_deltas': { + 'hb': 15, + 'icbin': 15, + 'icmi': 15, + 'itodd': 5, + 'lm': 15, + 'lmo': 15, + 'ruapc': 15, + 'tless': 15, + 'tudl': 15, + 'tyol': 15, + }, + 'vsd_taus': list(np.arange(0.05, 0.51, 0.05)), + 'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)] + }, + { + 'n_top': -1, + 'type': 'mssd', + 'correct_th': [[th] for th in np.arange(0.05, 0.51, 0.05)] + }, + { + 'n_top': -1, + 'type': 'mspd', + 'correct_th': [[th] for th in np.arange(5, 51, 5)] + }, + ], + + # Minimum visible surface fraction of a valid GT pose. + 'visib_gt_min': 0.1, + + # See misc.get_symmetry_transformations(). + 'max_sym_disc_step': 0.01, + + # Type of the renderer (used for the VSD pose error function). + 'renderer_type': 'python', # Options: 'cpp', 'python'. + + # Names of files with results for which to calculate the errors (assumed to be + # stored in folder config.eval_path). See docs/bop_challenge_2019.md for a + # description of the format. Example results can be found at: + # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/ + 'result_filenames': [ + '/path/to/csv/with/results', + ], + + # File with a list of estimation targets to consider. The file is assumed to + # be stored in the dataset folder. + 'targets_filename': 'test_targets_bop19.json', +} +################################################################################ + + +# Command line arguments. +# ------------------------------------------------------------------------------ +parser = argparse.ArgumentParser() +parser.add_argument('--visib_gt_min', default=p['visib_gt_min']) +parser.add_argument('--max_sym_disc_step', default=p['max_sym_disc_step']) +parser.add_argument('--renderer_type', default=p['renderer_type']) +parser.add_argument('--result_filenames', + default=','.join(p['result_filenames']), + help='Comma-separated names of files with results.') +parser.add_argument('--targets_filename', default=p['targets_filename']) +args = parser.parse_args() + +p['visib_gt_min'] = float(args.visib_gt_min) +p['max_sym_disc_step'] = float(args.max_sym_disc_step) +p['renderer_type'] = str(args.renderer_type) +p['result_filenames'] = args.result_filenames.split(',') +p['targets_filename'] = str(args.targets_filename) + +# Evaluation. +# ------------------------------------------------------------------------------ +for result_filename in p['result_filenames']: + + misc.log('===========') + misc.log('EVALUATING: {}'.format(result_filename)) + misc.log('===========') + + time_start = time.time() + + for error in p['errors']: + + # Calculate error of the pose estimates. + calc_errors_cmd = [ + 'python', + os.path.join('scripts', 'eval_calc_errors.py'), + '--n_top={}'.format(error['n_top']), + '--error_type={}'.format(error['type']), + '--result_filenames={}'.format(result_filename), + '--renderer_type={}'.format(p['renderer_type']), + '--targets_filename={}'.format(p['targets_filename']), + '--max_sym_disc_step={}'.format(p['max_sym_disc_step']), + '--skip_missing=1', + ] + if error['type'] == 'vsd': + vsd_deltas_str = \ + ','.join(['{}:{}'.format(k, v) for k, v in error['vsd_deltas'].items()]) + calc_errors_cmd += [ + '--vsd_deltas={}'.format(vsd_deltas_str), + '--vsd_taus={}'.format(','.join(map(str, error['vsd_taus']))) + ] + + misc.log('Running: ' + ' '.join(calc_errors_cmd)) + if subprocess.call(calc_errors_cmd) != 0: + raise RuntimeError('Calculation of VSD failed.') + + # Name of the result and the dataset. + result_name = os.path.splitext(os.path.basename(result_filename))[0] + dataset = str(result_name.split('_')[1].split('-')[0]) + + # Paths (rel. to config.eval_path) to folders with calculated pose errors. + # For VSD, there is one path for each setting of tau. For the other pose + # error functions, there is only one path. + error_dir_paths = {} + if error['type'] == 'vsd': + for vsd_tau in error['vsd_taus']: + error_sign = misc.get_error_signature( + error['type'], error['n_top'], vsd_delta=error['vsd_deltas'][dataset], + vsd_tau=vsd_tau) + error_dir_paths[error_sign] = os.path.join(result_name, error_sign) + else: + error_sign = misc.get_error_signature(error['type'], error['n_top']) + error_dir_paths[error_sign] = os.path.join(result_name, error_sign) + + # Recall scores for all settings of the threshold of correctness (and also + # of the misalignment tolerance tau in the case of VSD). + recalls = [] + + # Calculate performance scores. + for error_sign, error_dir_path in error_dir_paths.items(): + for correct_th in error['correct_th']: + + calc_scores_cmd = [ + 'python', + os.path.join('scripts', 'eval_calc_scores.py'), + '--error_dir_paths={}'.format(error_dir_path), + '--targets_filename={}'.format(p['targets_filename']), + '--visib_gt_min={}'.format(p['visib_gt_min']) + ] + + calc_scores_cmd += ['--correct_th_{}={}'.format( + error['type'], ','.join(map(str, correct_th)))] + + misc.log('Running: ' + ' '.join(calc_scores_cmd)) + if subprocess.call(calc_scores_cmd) != 0: + raise RuntimeError('Calculation of scores failed.') + + # Path to file with calculated scores. + score_sign = misc.get_score_signature(correct_th, p['visib_gt_min']) + + scores_filename = 'scores_{}.json'.format(score_sign) + scores_path = os.path.join( + config.eval_path, result_name, error_sign, scores_filename) + + # Load the scores. + misc.log('Loading calculated scores from: {}'.format(scores_path)) + scores = inout.load_json(scores_path) + recalls.append(scores['total_recall']) + + # Area under the recall surface. + aurs = np.mean(recalls) + + misc.log('Recall scores: {}'.format(' '.join(map(str, recalls)))) + misc.log('Area under the recall surface: {}'.format(aurs)) + + time_total = time.time() - time_start + misc.log('Evaluation of {} took {}s.'.format(result_filename, time_total)) + +misc.log('Done.') diff --git a/scripts/eval_calc_errors.py b/scripts/eval_calc_errors.py new file mode 100644 index 0000000..530ff8e --- /dev/null +++ b/scripts/eval_calc_errors.py @@ -0,0 +1,407 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Calculates error of 6D object pose estimates.""" + +import os +import time +import argparse +import copy +import numpy as np + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import pose_error +from bop_toolkit_lib import renderer + + +# PARAMETERS (can be overwritten by the command line arguments below). +################################################################################ +p = { + # Top N pose estimates (with the highest score) to be evaluated for each + # object class in each image. + # Options: 0 = all, -1 = given by the number of GT poses. + 'n_top': 1, + + # Pose error function. + # Options: 'vsd', 'mssd', 'mspd', 'ad', 'adi', 'add', 'cus', 're', 'te, etc. + 'error_type': 'mspd', + + # VSD parameters. + 'vsd_deltas': { + 'hb': 15, + 'icbin': 15, + 'icmi': 15, + 'itodd': 5, + 'lm': 15, + 'lmo': 15, + 'ruapc': 15, + 'tless': 15, + 'tudl': 15, + 'tyol': 15, + }, + 'vsd_taus': list(np.arange(0.05, 0.51, 0.05)), + 'vsd_normalized_by_diameter': True, + + # MSSD/MSPD parameters (see misc.get_symmetry_transformations). + 'max_sym_disc_step': 0.01, + + # Whether to ignore/break if some errors are missing. + 'skip_missing': True, + + # Type of the renderer (used for the VSD pose error function). + 'renderer_type': 'python', # Options: 'cpp', 'python'. + + # Names of files with results for which to calculate the errors (assumed to be + # stored in folder config.eval_path). See docs/bop_challenge_2019.md for a + # description of the format. Example results can be found at: + # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/ + 'result_filenames': [ + '/path/to/csv/with/results', + ], + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, + + # File with a list of estimation targets to consider. The file is assumed to + # be stored in the dataset folder. + 'targets_filename': 'test_targets_bop19.json', + + # Template of path to the output file with calculated errors. + 'out_errors_tpath': os.path.join( + config.eval_path, '{result_name}', '{error_sign}', + 'errors_{scene_id:06d}.json') +} +################################################################################ + + +# Command line arguments. +# ------------------------------------------------------------------------------ +vsd_deltas_str =\ + ','.join(['{}:{}'.format(k, v) for k, v in p['vsd_deltas'].items()]) + +parser = argparse.ArgumentParser() +parser.add_argument('--n_top', default=p['n_top']) +parser.add_argument('--error_type', default=p['error_type']) +parser.add_argument('--vsd_deltas', default=vsd_deltas_str) +parser.add_argument('--vsd_taus', default=','.join(map(str, p['vsd_taus']))) +parser.add_argument('--vsd_normalized_by_diameter', + default=p['vsd_normalized_by_diameter']) +parser.add_argument('--max_sym_disc_step', default=p['max_sym_disc_step']) +parser.add_argument('--skip_missing', default=p['skip_missing']) +parser.add_argument('--renderer_type', default=p['renderer_type']) +parser.add_argument('--result_filenames', + default=','.join(p['result_filenames']), + help='Comma-separated names of files with results.') +parser.add_argument('--datasets_path', default=p['datasets_path']) +parser.add_argument('--targets_filename', default=p['targets_filename']) +parser.add_argument('--out_errors_tpath', default=p['out_errors_tpath']) +args = parser.parse_args() + +p['n_top'] = int(args.n_top) +p['error_type'] = str(args.error_type) +p['vsd_deltas'] = {str(e.split(':')[0]): float(e.split(':')[1]) + for e in args.vsd_deltas.split(',')} +p['vsd_taus'] = map(float, args.vsd_taus.split(',')) +p['vsd_normalized_by_diameter'] = bool(args.vsd_normalized_by_diameter) +p['max_sym_disc_step'] = bool(args.max_sym_disc_step) +p['skip_missing'] = bool(args.skip_missing) +p['renderer_type'] = str(args.renderer_type) +p['result_filenames'] = args.result_filenames.split(',') +p['datasets_path'] = str(args.datasets_path) +p['targets_filename'] = str(args.targets_filename) +p['out_errors_tpath'] = str(args.out_errors_tpath) + +misc.log('-----------') +misc.log('Parameters:') +for k, v in p.items(): + misc.log('- {}: {}'.format(k, v)) +misc.log('-----------') + +# Error calculation. +# ------------------------------------------------------------------------------ +for result_filename in p['result_filenames']: + misc.log('Processing: {}'.format(result_filename)) + + ests_counter = 0 + time_start = time.time() + + # Parse info about the method and the dataset from the filename. + result_name = os.path.splitext(os.path.basename(result_filename))[0] + result_info = result_name.split('_') + method = str(result_info[0]) + dataset_info = result_info[1].split('-') + dataset = str(dataset_info[0]) + split = str(dataset_info[1]) + split_type = str(dataset_info[2]) if len(dataset_info) > 2 else None + split_type_str = ' - ' + split_type if split_type is not None else '' + + # Load dataset parameters. + dp_split = dataset_params.get_split_params( + p['datasets_path'], dataset, split, split_type) + + model_type = 'eval' + dp_model = dataset_params.get_model_params( + p['datasets_path'], dataset, model_type) + + # Load object models. + models = {} + if p['error_type'] in ['ad', 'add', 'adi', 'mssd', 'mspd']: + misc.log('Loading object models...') + for obj_id in dp_model['obj_ids']: + models[obj_id] = inout.load_ply( + dp_model['model_tpath'].format(obj_id=obj_id)) + + # Load models info. + models_info = None + if p['error_type'] in ['ad', 'add', 'adi', 'vsd', 'mssd', 'mspd', 'cus']: + models_info = inout.load_json( + dp_model['models_info_path'], keys_to_int=True) + + # Get sets of symmetry transformations for the object models. + models_sym = None + if p['error_type'] in ['mssd', 'mspd']: + models_sym = {} + for obj_id in dp_model['obj_ids']: + models_sym[obj_id] = misc.get_symmetry_transformations( + models_info[obj_id], p['max_sym_disc_step']) + + # Initialize a renderer. + ren = None + if p['error_type'] in ['vsd', 'cus']: + misc.log('Initializing renderer...') + width, height = dp_split['im_size'] + ren = renderer.create_renderer( + width, height, p['renderer_type'], mode='depth') + for obj_id in dp_model['obj_ids']: + ren.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) + + # Load the estimation targets. + targets = inout.load_json( + os.path.join(dp_split['base_path'], p['targets_filename'])) + + # Organize the targets by scene, image and object. + misc.log('Organizing estimation targets...') + targets_org = {} + for target in targets: + targets_org.setdefault(target['scene_id'], {}).setdefault( + target['im_id'], {})[target['obj_id']] = target + + # Load pose estimates. + misc.log('Loading pose estimates...') + ests = inout.load_bop_results( + os.path.join(config.results_path, result_filename)) + + # Organize the pose estimates by scene, image and object. + misc.log('Organizing pose estimates...') + ests_org = {} + for est in ests: + ests_org.setdefault(est['scene_id'], {}).setdefault( + est['im_id'], {}).setdefault(est['obj_id'], []).append(est) + + for scene_id, scene_targets in targets_org.items(): + + # Load camera and GT poses for the current scene. + scene_camera = inout.load_scene_camera( + dp_split['scene_camera_tpath'].format(scene_id=scene_id)) + scene_gt = inout.load_scene_gt(dp_split['scene_gt_tpath'].format( + scene_id=scene_id)) + + scene_errs = [] + + for im_ind, (im_id, im_targets) in enumerate(scene_targets.items()): + + if im_ind % 10 == 0: + misc.log( + 'Calculating error {} - method: {}, dataset: {}{}, scene: {}, ' + 'im: {}'.format( + p['error_type'], method, dataset, split_type_str, scene_id, im_ind)) + + # Intrinsic camera matrix. + K = scene_camera[im_id]['cam_K'] + + # Load the depth image if VSD is selected as the pose error function. + depth_im = None + if p['error_type'] == 'vsd': + depth_path = dp_split['depth_tpath'].format( + scene_id=scene_id, im_id=im_id) + depth_im = inout.load_depth(depth_path) + depth_im *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. + + for obj_id, target in im_targets.items(): + + # The required number of top estimated poses. + if p['n_top'] == 0: # All estimates are considered. + n_top_curr = None + elif p['n_top'] == -1: # Given by the number of GT poses. + # n_top_curr = sum([gt['obj_id'] == obj_id for gt in scene_gt[im_id]]) + n_top_curr = target['inst_count'] + else: + n_top_curr = p['n_top'] + + # Get the estimates. + try: + obj_ests = ests_org[scene_id][im_id][obj_id] + obj_count = len(obj_ests) + except KeyError: + obj_ests = [] + obj_count = 0 + + # Check the number of estimates. + if not p['skip_missing'] and obj_count < n_top_curr: + raise ValueError( + 'Not enough estimates for scene: {}, im: {}, obj: {} ' + '(provided: {}, expected: {})'.format( + scene_id, im_id, obj_id, obj_count, n_top_curr)) + + # Sort the estimates by score (in descending order). + obj_ests_sorted = sorted( + enumerate(obj_ests), key=lambda x: x[1]['score'], reverse=True) + + # Select the required number of top estimated poses. + obj_ests_sorted = obj_ests_sorted[slice(0, n_top_curr)] + ests_counter += len(obj_ests_sorted) + + # Calculate error of each pose estimate w.r.t. all GT poses of the same + # object class. + for est_id, est in obj_ests_sorted: + + # Estimated pose. + R_e = est['R'] + t_e = est['t'] + + errs = {} # Errors w.r.t. GT poses of the same object class. + for gt_id, gt in enumerate(scene_gt[im_id]): + if gt['obj_id'] != obj_id: + continue + + # Ground-truth pose. + R_g = gt['cam_R_m2c'] + t_g = gt['cam_t_m2c'] + + # Check if the projections of the bounding spheres of the object in + # the two poses overlap (to speed up calculation of some errors). + sphere_projections_overlap = None + if p['error_type'] in ['vsd', 'cus']: + radius = 0.5 * models_info[obj_id]['diameter'] + sphere_projections_overlap = misc.overlapping_sphere_projections( + radius, t_e.squeeze(), t_g.squeeze()) + + # Check if the bounding spheres of the object in the two poses + # overlap (to speed up calculation of some errors). + spheres_overlap = None + if p['error_type'] in ['ad', 'add', 'adi', 'mssd']: + center_dist = np.linalg.norm(t_e - t_g) + spheres_overlap = center_dist < models_info[obj_id]['diameter'] + + if p['error_type'] == 'vsd': + if not sphere_projections_overlap: + e = [1.0] * len(p['vsd_taus']) + else: + e = pose_error.vsd( + R_e, t_e, R_g, t_g, depth_im, K, p['vsd_deltas'][dataset], + p['vsd_taus'], p['vsd_normalized_by_diameter'], + models_info[obj_id]['diameter'], ren, obj_id, 'step') + + elif p['error_type'] == 'mssd': + if not spheres_overlap: + e = [float('inf')] + else: + e = [pose_error.mssd( + R_e, t_e, R_g, t_g, models[obj_id]['pts'], + models_sym[obj_id])] + + elif p['error_type'] == 'mspd': + e = [pose_error.mspd( + R_e, t_e, R_g, t_g, K, models[obj_id]['pts'], + models_sym[obj_id])] + + elif p['error_type'] in ['ad', 'add', 'adi']: + if not spheres_overlap: + # Infinite error if the bounding spheres do not overlap. With + # typically used values of the correctness threshold for the AD + # error (e.g. k*diameter, where k = 0.1), such pose estimates + # would be considered incorrect anyway. + e = [float('inf')] + else: + if p['error_type'] == 'ad': + if obj_id in dp_model['symmetric_obj_ids']: + e = [pose_error.adi( + R_e, t_e, R_g, t_g, models[obj_id]['pts'])] + else: + e = [pose_error.add( + R_e, t_e, R_g, t_g, models[obj_id]['pts'])] + + elif p['error_type'] == 'add': + e = [pose_error.add( + R_e, t_e, R_g, t_g, models[obj_id]['pts'])] + + else: # 'adi' + e = [pose_error.adi( + R_e, t_e, R_g, t_g, models[obj_id]['pts'])] + + elif p['error_type'] == 'cus': + if sphere_projections_overlap: + e = [pose_error.cus( + R_e, t_e, R_g, t_g, K, ren, obj_id)] + else: + e = [1.0] + + elif p['error_type'] == 'rete': + e = [pose_error.re(R_e, R_g), pose_error.te(t_e, t_g)] + + elif p['error_type'] == 're': + e = [pose_error.re(R_e, R_g)] + + elif p['error_type'] == 'te': + e = [pose_error.te(t_e, t_g)] + + else: + raise ValueError('Unknown pose error function.') + + errs[gt_id] = e + + # Save the calculated errors. + scene_errs.append({ + 'im_id': im_id, + 'obj_id': obj_id, + 'est_id': est_id, + 'score': est['score'], + 'errors': errs + }) + + def save_errors(_error_sign, _scene_errs): + # Save the calculated errors to a JSON file. + errors_path = p['out_errors_tpath'].format( + result_name=result_name, error_sign=_error_sign, scene_id=scene_id) + misc.ensure_dir(os.path.dirname(errors_path)) + misc.log('Saving errors to: {}'.format(errors_path)) + inout.save_json(errors_path, _scene_errs) + + # Save the calculated errors. + if p['error_type'] == 'vsd': + + # For VSD, save errors for each tau value to a different file. + for vsd_tau_id, vsd_tau in enumerate(p['vsd_taus']): + error_sign = misc.get_error_signature( + p['error_type'], p['n_top'], vsd_delta=p['vsd_deltas'][dataset], + vsd_tau=vsd_tau) + + # Keep only errors for the current tau. + scene_errs_curr = copy.deepcopy(scene_errs) + for err in scene_errs_curr: + for gt_id in err['errors'].keys(): + err['errors'][gt_id] = [err['errors'][gt_id][vsd_tau_id]] + + save_errors(error_sign, scene_errs_curr) + else: + error_sign = misc.get_error_signature(p['error_type'], p['n_top']) + save_errors(error_sign, scene_errs) + + time_total = time.time() - time_start + misc.log('Calculation of errors for {} estimates took {}s.'.format( + ests_counter, time_total)) + +misc.log('Done.') diff --git a/scripts/eval_calc_scores.py b/scripts/eval_calc_scores.py new file mode 100644 index 0000000..c6d98e3 --- /dev/null +++ b/scripts/eval_calc_scores.py @@ -0,0 +1,256 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Calculates performance scores for 6D object pose estimation tasks. + +Errors of the pose estimates need to be pre-calculated with eval_calc_errors.py. + +Currently supported tasks (see [1]): +- SiSo (a single instance of a single object) + +For evaluation in the BOP paper [1], the following parameters were used: + - n_top = 1 + - visib_gt_min = 0.1 + - error_type = 'vsd' + - vsd_cost = 'step' + - vsd_delta = 15 + - vsd_tau = 20 + - correct_th['vsd'] = 0.3 + + [1] Hodan, Michel et al. BOP: Benchmark for 6D Object Pose Estimation, ECCV'18. +""" + +import os +import time +import argparse + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import pose_matching +from bop_toolkit_lib import score + + +# PARAMETERS (can be overwritten by the command line arguments below). +################################################################################ +p = { + # Threshold of correctness for different pose error functions. + 'correct_th': { + 'vsd': [0.3], + 'mssd': [0.2], + 'mspd': [10], + 'cus': [0.5], + 'rete': [5.0, 5.0], # [deg, cm]. + 're': [5.0], # [deg]. + 'te': [5.0], # [cm]. + 'ad': [0.1], + 'add': [0.1], + 'adi': [0.1], + }, + + # Pose errors that will be normalized by object diameter before thresholding. + 'normalized_by_diameter': ['ad', 'add', 'adi', 'mssd'], + + # Pose errors that will be normalized the image width before thresholding. + 'normalized_by_im_width': ['mspd'], + + # Minimum visible surface fraction of a valid GT pose. + 'visib_gt_min': 0.1, + + # Paths (relative to config.eval_path) to folders with pose errors calculated + # using eval_calc_errors.py. + # Example: 'hodan-iros15_lm-test/error=vsd_ntop=1_delta=15_tau=20_cost=step' + 'error_dir_paths': [ + r'/path/to/calculated/errors', + ], + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, + + # File with a list of estimation targets to consider. The file is assumed to + # be stored in the dataset folder. + 'targets_filename': 'test_targets_bop19.json', + + # Template of path to the input file with calculated errors. + 'error_tpath': os.path.join( + config.eval_path, '{error_dir_path}', 'errors_{scene_id:06d}.json'), + + # Template of path to the output file with established matches and calculated + # scores. + 'out_matches_tpath': os.path.join( + config.eval_path, '{error_dir_path}', 'matches_{score_sign}.json'), + 'out_scores_tpath': os.path.join( + config.eval_path, '{error_dir_path}', 'scores_{score_sign}.json'), +} +################################################################################ + + +# Command line arguments. +# ------------------------------------------------------------------------------ +parser = argparse.ArgumentParser() + +# Define the command line arguments. +for err_type in p['correct_th']: + parser.add_argument( + '--correct_th_' + err_type, + default=','.join(map(str, p['correct_th'][err_type]))) + +parser.add_argument('--normalized_by_diameter', + default=','.join(p['normalized_by_diameter'])) +parser.add_argument('--normalized_by_im_width', + default=','.join(p['normalized_by_im_width'])) +parser.add_argument('--visib_gt_min', default=p['visib_gt_min']) +parser.add_argument('--error_dir_paths', default=','.join(p['error_dir_paths']), + help='Comma-sep. paths to errors from eval_calc_errors.py.') +parser.add_argument('--datasets_path', default=p['datasets_path']) +parser.add_argument('--targets_filename', default=p['targets_filename']) +parser.add_argument('--error_tpath', default=p['error_tpath']) +parser.add_argument('--out_matches_tpath', default=p['out_matches_tpath']) +parser.add_argument('--out_scores_tpath', default=p['out_scores_tpath']) + +# Process the command line arguments. +args = parser.parse_args() + +for err_type in p['correct_th']: + p['correct_th'][err_type] =\ + map(float, args.__dict__['correct_th_' + err_type].split(',')) + +p['normalized_by_diameter'] = args.normalized_by_diameter.split(',') +p['normalized_by_im_width'] = args.normalized_by_im_width.split(',') +p['visib_gt_min'] = float(args.visib_gt_min) +p['error_dir_paths'] = args.error_dir_paths.split(',') +p['datasets_path'] = str(args.datasets_path) +p['targets_filename'] = str(args.targets_filename) +p['error_tpath'] = str(args.error_tpath) +p['out_matches_tpath'] = str(args.out_matches_tpath) +p['out_scores_tpath'] = str(args.out_scores_tpath) + +misc.log('-----------') +misc.log('Parameters:') +for k, v in p.items(): + misc.log('- {}: {}'.format(k, v)) +misc.log('-----------') + +# Calculation of the performance scores. +# ------------------------------------------------------------------------------ +for error_dir_path in p['error_dir_paths']: + misc.log('Processing: {}'.format(error_dir_path)) + + time_start = time.time() + + # Parse info about the errors from the folder name. + error_sign = os.path.basename(error_dir_path) + err_type = str(error_sign.split('_')[0].split('=')[1]) + n_top = int(error_sign.split('_')[1].split('=')[1]) + result_info = os.path.basename(os.path.dirname(error_dir_path)).split('_') + method = result_info[0] + dataset_info = result_info[1].split('-') + dataset = dataset_info[0] + split = dataset_info[1] + split_type = dataset_info[2] if len(dataset_info) > 2 else None + + # Evaluation signature. + score_sign = misc.get_score_signature( + p['correct_th'][err_type], p['visib_gt_min']) + + misc.log('Calculating score - error: {}, method: {}, dataset: {}.'.format( + err_type, method, dataset)) + + # Load dataset parameters. + dp_split = dataset_params.get_split_params( + p['datasets_path'], dataset, split, split_type) + + model_type = 'eval' + dp_model = dataset_params.get_model_params( + p['datasets_path'], dataset, model_type) + + # Load info about the object models. + models_info = inout.load_json(dp_model['models_info_path'], keys_to_int=True) + + # Load the estimation targets to consider. + targets = inout.load_json( + os.path.join(dp_split['base_path'], p['targets_filename'])) + scene_im_ids = {} + + # Organize the targets by scene, image and object. + misc.log('Organizing estimation targets...') + targets_org = {} + for target in targets: + targets_org.setdefault(target['scene_id'], {}).setdefault( + target['im_id'], {})[target['obj_id']] = target + + # Go through the test scenes and match estimated poses to GT poses. + # ---------------------------------------------------------------------------- + matches = [] # Stores info about the matching pose estimate for each GT pose. + for scene_id, scene_targets in targets_org.items(): + misc.log('Processing scene {} of {}...'.format(scene_id, dataset)) + + # Load GT poses for the current scene. + scene_gt = inout.load_scene_gt( + dp_split['scene_gt_tpath'].format(scene_id=scene_id)) + + # Load info about the GT poses (e.g. visibility) for the current scene. + scene_gt_info = inout.load_json( + dp_split['scene_gt_info_tpath'].format(scene_id=scene_id), + keys_to_int=True) + + # Keep GT poses only for the selected targets. + scene_gt_curr = {} + scene_gt_info_curr = {} + scene_gt_valid = {} + for im_id, im_targets in scene_targets.items(): + scene_gt_curr[im_id] = scene_gt[im_id] + + # Determine which GT poses are valid. + scene_gt_valid[im_id] = [] + im_gt_info = scene_gt_info[im_id] + for gt_id, gt in enumerate(scene_gt[im_id]): + is_target = gt['obj_id'] in im_targets.keys() + is_visib = im_gt_info[gt_id]['visib_fract'] >= p['visib_gt_min'] + scene_gt_valid[im_id].append(is_target and is_visib) + + # Load pre-calculated errors of the pose estimates w.r.t. the GT poses. + scene_errs_path = p['error_tpath'].format( + error_dir_path=error_dir_path, scene_id=scene_id) + scene_errs = inout.load_json(scene_errs_path, keys_to_int=True) + + # Normalize the errors by the object diameter. + if err_type in p['normalized_by_diameter']: + for err in scene_errs: + diameter = float(models_info[err['obj_id']]['diameter']) + for gt_id in err['errors'].keys(): + err['errors'][gt_id] = [e / diameter for e in err['errors'][gt_id]] + + # Normalize the errors by the image width. + if err_type in p['normalized_by_im_width']: + for err in scene_errs: + factor = 640.0 / float(dp_split['im_size'][0]) + for gt_id in err['errors'].keys(): + err['errors'][gt_id] = [factor * e for e in err['errors'][gt_id]] + + # Match the estimated poses to the ground-truth poses. + matches += pose_matching.match_poses_scene( + scene_id, scene_gt_curr, scene_gt_valid, scene_errs, + p['correct_th'][err_type], n_top) + + # Calculate the performance scores. + # ---------------------------------------------------------------------------- + # 6D object localization scores (SiSo if n_top = 1). + scores = score.calc_localization_scores( + dp_split['scene_ids'], dp_model['obj_ids'], matches, n_top) + + # Save scores. + scores_path = p['out_scores_tpath'].format( + error_dir_path=error_dir_path, score_sign=score_sign) + inout.save_json(scores_path, scores) + + # Save matches. + matches_path = p['out_matches_tpath'].format( + error_dir_path=error_dir_path, score_sign=score_sign) + inout.save_json(matches_path, matches) + + time_total = time.time() - time_start + misc.log('Matching and score calculation took {}s.'.format(time_total)) + +misc.log('Done.') diff --git a/scripts/meshlab_scripts/remesh_for_eval_cell=0.25.mlx b/scripts/meshlab_scripts/remesh_for_eval_cell=0.25.mlx new file mode 100644 index 0000000..365ba90 --- /dev/null +++ b/scripts/meshlab_scripts/remesh_for_eval_cell=0.25.mlx @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/meshlab_scripts/remesh_for_eval_cell=0.5.mlx b/scripts/meshlab_scripts/remesh_for_eval_cell=0.5.mlx new file mode 100644 index 0000000..110538b --- /dev/null +++ b/scripts/meshlab_scripts/remesh_for_eval_cell=0.5.mlx @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/remesh_models_for_eval.py b/scripts/remesh_models_for_eval.py new file mode 100644 index 0000000..31e9029 --- /dev/null +++ b/scripts/remesh_models_for_eval.py @@ -0,0 +1,67 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""'Uniformly' resamples and decimates 3D object models for evaluation. + +Note: Models of some T-LESS objects were processed by Blender (using the Remesh +modifier). +""" + +import os + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import misc + + +# PARAMETERS. +################################################################################ +p = { + # See dataset_params.py for options. + 'dataset': 'lm', + + # Type of input object models. + # None = default model type. + 'model_in_type': None, + + # Type of output object models. + 'model_out_type': 'eval', + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, + + # Path to meshlabserver.exe (tested version: 1.3.3). + # On Windows: C:\Program Files\VCG\MeshLab133\meshlabserver.exe + 'meshlab_server_path': config.meshlab_server_path, + + # Path to scripts/meshlab_scripts/remesh_for_eval.mlx. + 'meshlab_script_path': os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'meshlab_scripts', + r'remesh_for_eval_cell=0.25.mlx'), +} +################################################################################ + + +# Load dataset parameters. +dp_model_in = dataset_params.get_model_params( + p['datasets_path'], p['dataset'], p['model_in_type']) + +dp_model_out = dataset_params.get_model_params( + p['datasets_path'], p['dataset'], p['model_out_type']) + +# Attributes to save for the output models. +attrs_to_save = [] + +# Process models of all objects in the selected dataset. +for obj_id in dp_model_in['obj_ids']: + misc.log('\n\n\nProcessing model of object {}...\n'.format(obj_id)) + + model_in_path = dp_model_in['model_tpath'].format(obj_id=obj_id) + model_out_path = dp_model_out['model_tpath'].format(obj_id=obj_id) + + misc.ensure_dir(os.path.dirname(model_out_path)) + + misc.run_meshlab_script(p['meshlab_server_path'], p['meshlab_script_path'], + model_in_path, model_out_path, attrs_to_save) + +misc.log('Done.') diff --git a/scripts/render_train_imgs.py b/scripts/render_train_imgs.py new file mode 100644 index 0000000..7ff0e92 --- /dev/null +++ b/scripts/render_train_imgs.py @@ -0,0 +1,214 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Renders RGB-D images of an object model.""" + +import os +import cv2 + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import renderer +from bop_toolkit_lib import view_sampler + + +# PARAMETERS. +################################################################################ +# See dataset_params.py for options. +dataset = 'tyol' + +# Radii of view spheres from which to render the objects. +if dataset == 'lm': + radii = [400] # There are only 3 occurrences under 400 mm. +elif dataset == 'tless': + radii = [650] +elif dataset == 'tudl': + radii = [850] +elif dataset == 'tyol': + radii = [500] +elif dataset == 'ruapc': + radii = [590] +elif dataset == 'icmi': + radii = [500] +elif dataset == 'icbin': + radii = [450] +else: + raise ValueError('Unknown dataset.') + +# Type of object models and camera. +model_type = None +cam_type = None +if dataset == 'tless': + model_type = 'reconst' + cam_type = 'primesense' + +# Objects to render ([] = all objects from the specified dataset). +obj_ids = [] + +# Minimum required number of views on the whole view sphere. The final number of +# views depends on the sampling method. +min_n_views = 1000 + +# Rendering parameters. +ambient_weight = 0.5 # Weight of ambient light [0, 1] +shading = 'phong' # 'flat', 'phong' + +# Type of the renderer. Options: 'cpp', 'python'. +renderer_type = 'python' + +# Super-sampling anti-aliasing (SSAA) - the RGB image is rendered at ssaa_fact +# times higher resolution and then down-sampled to the required resolution. +# Ref: https://github.com/vispy/vispy/wiki/Tech.-Antialiasing +ssaa_fact = 4 + +# Folder containing the BOP datasets. +datasets_path = config.datasets_path + +# Folder for the rendered images. +out_tpath = os.path.join(config.output_path, 'render_{dataset}') + +# Output path templates. +out_rgb_tpath =\ + os.path.join('{out_path}', '{obj_id:06d}', 'rgb', '{im_id:06d}.png') +out_depth_tpath =\ + os.path.join('{out_path}', '{obj_id:06d}', 'depth', '{im_id:06d}.png') +out_scene_camera_tpath =\ + os.path.join('{out_path}', '{obj_id:06d}', 'scene_camera.json') +out_scene_gt_tpath =\ + os.path.join('{out_path}', '{obj_id:06d}', 'scene_gt.json') +out_views_vis_tpath =\ + os.path.join('{out_path}', '{obj_id:06d}', 'views_radius={radius}.ply') +################################################################################ + + +out_path = out_tpath.format(dataset=dataset) +misc.ensure_dir(out_path) + +# Load dataset parameters. +dp_split_test = dataset_params.get_split_params(datasets_path, dataset, 'test') +dp_model = dataset_params.get_model_params(datasets_path, dataset, model_type) +dp_camera = dataset_params.get_camera_params(datasets_path, dataset, cam_type) + +if not obj_ids: + obj_ids = dp_model['obj_ids'] + +# Image size and K for the RGB image (potentially with SSAA). +im_size_rgb = [int(round(x * float(ssaa_fact))) for x in dp_camera['im_size']] +K_rgb = dp_camera['K'] * ssaa_fact + +# Intrinsic parameters for RGB rendering. +fx_rgb, fy_rgb, cx_rgb, cy_rgb =\ + K_rgb[0, 0], K_rgb[1, 1], K_rgb[0, 2], K_rgb[1, 2] + +# Intrinsic parameters for depth rendering. +K = dp_camera['K'] +fx_d, fy_d, cx_d, cy_d = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + +# Create RGB and depth renderers (two are created because the RGB has a higher +# resolution if SSAA is used). +width_rgb, height_rgb = im_size_rgb[0], im_size_rgb[1] +ren_rgb = renderer.create_renderer( + width_rgb, height_rgb, renderer_type, mode='rgb', shading=shading) +ren_rgb.set_light_ambient_weight(ambient_weight) + +width_depth, height_depth, = dp_camera['im_size'][0], dp_camera['im_size'][1] +ren_depth = renderer.create_renderer( + width_depth, height_depth, renderer_type, mode='depth') + +# Render training images for all object models. +for obj_id in obj_ids: + + # Add the current object model to the renderer. + ren_rgb.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) + ren_depth.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) + + # Prepare output folders. + misc.ensure_dir(os.path.dirname(out_rgb_tpath.format( + out_path=out_path, obj_id=obj_id, im_id=0))) + misc.ensure_dir(os.path.dirname(out_depth_tpath.format( + out_path=out_path, obj_id=obj_id, im_id=0))) + + # Load model. + model_path = dp_model['model_tpath'].format(obj_id=obj_id) + model = inout.load_ply(model_path) + + # Load model texture. + if 'texture_file' in model: + model_texture_path =\ + os.path.join(os.path.dirname(model_path), model['texture_file']) + model_texture = inout.load_im(model_texture_path) + else: + model_texture = None + + scene_camera = {} + scene_gt = {} + im_id = 0 + for radius in radii: + # Sample viewpoints. + view_sampler_mode = 'hinterstoisser' # 'hinterstoisser' or 'fibonacci'. + views, views_level = view_sampler.sample_views( + min_n_views, radius, dp_split_test['azimuth_range'], + dp_split_test['elev_range'], view_sampler_mode) + + misc.log('Sampled views: ' + str(len(views))) + # out_views_vis_path = out_views_vis_tpath.format( + # out_path=out_path, obj_id=obj_id, radius=radius) + # view_sampler.save_vis(out_views_vis_path, views, views_level) + + # Render the object model from all views. + for view_id, view in enumerate(views): + if view_id % 10 == 0: + misc.log('Rendering - obj: {}, radius: {}, view: {}/{}'.format( + obj_id, radius, view_id, len(views))) + + # Rendering. + rgb = ren_rgb.render_object( + obj_id, view['R'], view['t'], fx_rgb, fy_rgb, cx_rgb, cy_rgb)['rgb'] + depth = ren_depth.render_object( + obj_id, view['R'], view['t'], fx_d, fy_d, cx_d, cy_d)['depth'] + + # Convert depth so it is in the same units as other images in the dataset. + depth /= dp_camera['depth_scale'] + + # The OpenCV function was used for rendering of the training images + # provided for the SIXD Challenge 2017. + rgb = cv2.resize(rgb, dp_camera['im_size'], interpolation=cv2.INTER_AREA) + # rgb = scipy.misc.imresize(rgb, par['cam']['im_size'][::-1], 'bicubic') + + # Save the rendered images. + out_rgb_path = out_rgb_tpath.format( + out_path=out_path, obj_id=obj_id, im_id=im_id) + inout.save_im(out_rgb_path, rgb) + out_depth_path = out_depth_tpath.format( + out_path=out_path, obj_id=obj_id, im_id=im_id) + inout.save_depth(out_depth_path, depth) + + # Get 2D bounding box of the object model at the ground truth pose. + # ys, xs = np.nonzero(depth > 0) + # obj_bb = misc.calc_2d_bbox(xs, ys, dp_camera['im_size']) + + scene_camera[im_id] = { + 'cam_K': dp_camera['K'].flatten().tolist(), + 'depth_scale': dp_camera['depth_scale'], + 'view_level': int(views_level[view_id]) + } + + scene_gt[im_id] = [{ + 'cam_R_m2c': view['R'].flatten().tolist(), + 'cam_t_m2c': view['t'].flatten().tolist(), + 'obj_id': int(obj_id) + }] + + im_id += 1 + + # Remove the current object model from the renderer. + ren_rgb.remove_object(obj_id) + ren_depth.remove_object(obj_id) + + # Save metadata. + inout.save_scene_camera(out_scene_camera_tpath.format( + out_path=out_path, obj_id=obj_id), scene_camera) + inout.save_scene_gt(out_scene_gt_tpath.format( + out_path=out_path, obj_id=obj_id), scene_gt) diff --git a/scripts/vis_est_poses.py b/scripts/vis_est_poses.py new file mode 100644 index 0000000..ff9a4dd --- /dev/null +++ b/scripts/vis_est_poses.py @@ -0,0 +1,235 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Visualizes object models in pose estimates saved in the BOP format.""" + +import os +import numpy as np +import itertools + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import renderer +from bop_toolkit_lib import visualization + + +# PARAMETERS. +################################################################################ +p = { + # Top N pose estimates (with the highest score) to be visualized for each + # object in each image. + 'n_top': 1, # 0 = all estimates, -1 = given by the number of GT poses. + + # True = one visualization for each (im_id, obj_id), False = one per im_id. + 'vis_per_obj_id': True, + + # Indicates whether to render RGB image. + 'vis_rgb': True, + + # Indicates whether to resolve visibility in the rendered RGB images (using + # depth renderings). If True, only the part of object surface, which is not + # occluded by any other modeled object, is visible. If False, RGB renderings + # of individual objects are blended together. + 'vis_rgb_resolve_visib': True, + + # Indicates whether to render depth image. + 'vis_depth_diff': False, + + # If to use the original model color. + 'vis_orig_color': False, + + # Type of the renderer (used for the VSD pose error function). + 'renderer_type': 'python', # Options: 'cpp', 'python'. + + # Names of files with pose estimates to visualize (assumed to be stored in + # folder config.eval_path). See docs/bop_challenge_2019.md for a description + # of the format. Example results can be found at: + # http://ptak.felk.cvut.cz/6DB/public/bop_sample_results/bop_challenge_2019/ + 'result_filenames': [ + '/path/to/csv/with/results', + ], + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, + + # Folder for output visualisations. + 'vis_path': os.path.join(config.output_path, 'vis_est_poses'), + + # Path templates for output images. + 'vis_rgb_tpath': os.path.join( + '{vis_path}', '{result_name}', '{scene_id:06d}', '{vis_name}.jpg'), + 'vis_depth_diff_tpath': os.path.join( + '{vis_path}', '{result_name}', '{scene_id:06d}', + '{vis_name}_depth_diff.jpg'), +} +################################################################################ + + +# Load colors. +colors_path = os.path.join( + os.path.dirname(visualization.__file__), 'colors.json') +colors = inout.load_json(colors_path) + +for result_fname in p['result_filenames']: + misc.log('Processing: ' + result_fname) + + # Parse info about the method and the dataset from the filename. + result_name = os.path.splitext(os.path.basename(result_fname))[0] + result_info = result_name.split('_') + method = result_info[0] + dataset_info = result_info[1].split('-') + dataset = dataset_info[0] + split = dataset_info[1] + split_type = dataset_info[2] if len(dataset_info) > 2 else None + + # Load dataset parameters. + dp_split = dataset_params.get_split_params( + p['datasets_path'], dataset, split, split_type) + + model_type = 'eval' + dp_model = dataset_params.get_model_params( + p['datasets_path'], dataset, model_type) + + # Rendering mode. + renderer_modalities = [] + if p['vis_rgb']: + renderer_modalities.append('rgb') + if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): + renderer_modalities.append('depth') + renderer_mode = '+'.join(renderer_modalities) + + # Create a renderer. + width, height = dp_split['im_size'] + ren = renderer.create_renderer( + width, height, p['renderer_type'], mode=renderer_mode) + + # Load object models. + models = {} + for obj_id in dp_model['obj_ids']: + misc.log('Loading 3D model of object {}...'.format(obj_id)) + model_path = dp_model['model_tpath'].format(obj_id=obj_id) + model_color = None + if not p['vis_orig_color']: + model_color = tuple(colors[(obj_id - 1) % len(colors)]) + ren.add_object(obj_id, model_path, surf_color=model_color) + + # Load pose estimates. + misc.log('Loading pose estimates...') + ests = inout.load_bop_results( + os.path.join(config.results_path, result_fname)) + + # Organize the pose estimates by scene, image and object. + misc.log('Organizing pose estimates...') + ests_org = {} + for est in ests: + ests_org.setdefault(est['scene_id'], {}).setdefault( + est['im_id'], {}).setdefault(est['obj_id'], []).append(est) + + for scene_id, scene_ests in ests_org.items(): + + # Load info and ground-truth poses for the current scene. + scene_camera = inout.load_scene_camera( + dp_split['scene_camera_tpath'].format(scene_id=scene_id)) + scene_gt = inout.load_scene_gt( + dp_split['scene_gt_tpath'].format(scene_id=scene_id)) + + for im_ind, (im_id, im_ests) in enumerate(scene_ests.items()): + + if im_ind % 10 == 0: + split_type_str = ' - ' + split_type if split_type is not None else '' + misc.log( + 'Visualizing pose estimates - method: {}, dataset: {}{}, scene: {}, ' + 'im: {}'.format(method, dataset, split_type_str, scene_id, im_id)) + + # Intrinsic camera matrix. + K = scene_camera[im_id]['cam_K'] + + im_ests_vis = [] + im_ests_vis_obj_ids = [] + for obj_id, obj_ests in im_ests.items(): + + # Sort the estimates by score (in descending order). + obj_ests_sorted = sorted( + obj_ests, key=lambda est: est['score'], reverse=True) + + # Select the number of top estimated poses to visualize. + if p['n_top'] == 0: # All estimates are considered. + n_top_curr = None + elif p['n_top'] == -1: # Given by the number of GT poses. + n_gt = sum([gt['obj_id'] == obj_id for gt in scene_gt[im_id]]) + n_top_curr = n_gt + else: # Specified by the parameter n_top. + n_top_curr = p['n_top'] + obj_ests_sorted = obj_ests_sorted[slice(0, n_top_curr)] + + # Get list of poses to visualize. + for est in obj_ests_sorted: + est['obj_id'] = obj_id + + # Text info to write on the image at the pose estimate. + if p['vis_per_obj_id']: + est['text_info'] = [ + {'name': '', 'val': est['score'], 'fmt': ':.2f'} + ] + else: + val = '{}:{:.2f}'.format(obj_id, est['score']) + est['text_info'] = [{'name': '', 'val': val, 'fmt': ''}] + + im_ests_vis.append(obj_ests_sorted) + im_ests_vis_obj_ids.append(obj_id) + + # Join the per-object estimates if only one visualization is to be made. + if not p['vis_per_obj_id']: + im_ests_vis = [list(itertools.chain.from_iterable(im_ests_vis))] + + for ests_vis_id, ests_vis in enumerate(im_ests_vis): + + # Load the color and depth images and prepare images for rendering. + rgb = None + if p['vis_rgb']: + if 'rgb' in dp_split['im_modalities']: + rgb = inout.load_im(dp_split['rgb_tpath'].format( + scene_id=scene_id, im_id=im_id)) + elif 'gray' in dp_split['im_modalities']: + gray = inout.load_im(dp_split['gray_tpath'].format( + scene_id=scene_id, im_id=im_id)) + rgb = np.dstack([gray, gray, gray]) + else: + raise ValueError('RGB nor gray images are available.') + + depth = None + if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): + depth = inout.load_depth(dp_split['depth_tpath'].format( + scene_id=scene_id, im_id=im_id)) + depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. + + # Visualization name. + if p['vis_per_obj_id']: + vis_name = '{im_id:06d}_{obj_id:06d}'.format( + im_id=im_id, obj_id=im_ests_vis_obj_ids[ests_vis_id]) + else: + vis_name = '{im_id:06d}'.format(im_id=im_id) + + # Path to the output RGB visualization. + vis_rgb_path = None + if p['vis_rgb']: + vis_rgb_path = p['vis_rgb_tpath'].format( + vis_path=p['vis_path'], result_name=result_name, scene_id=scene_id, + vis_name=vis_name) + + # Path to the output depth difference visualization. + vis_depth_diff_path = None + if p['vis_depth_diff']: + vis_depth_diff_path = p['vis_depth_diff_tpath'].format( + vis_path=p['vis_path'], result_name=result_name, scene_id=scene_id, + vis_name=vis_name) + + # Visualization. + visualization.vis_object_poses( + poses=ests_vis, K=K, renderer=ren, rgb=rgb, depth=depth, + vis_rgb_path=vis_rgb_path, vis_depth_diff_path=vis_depth_diff_path, + vis_rgb_resolve_visib=p['vis_rgb_resolve_visib']) + +misc.log('Done.') diff --git a/scripts/vis_gt_poses.py b/scripts/vis_gt_poses.py new file mode 100644 index 0000000..6d6e069 --- /dev/null +++ b/scripts/vis_gt_poses.py @@ -0,0 +1,209 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Visualizes object models in the ground-truth poses.""" + +import os +import numpy as np + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import renderer +from bop_toolkit_lib import visualization + + +# PARAMETERS. +################################################################################ +p = { + # See dataset_params.py for options. + 'dataset': 'lm', + + # Dataset split. Options: 'train', 'val', 'test'. + 'dataset_split': 'test', + + # Dataset split type. None = default. See dataset_params.py for options. + 'dataset_split_type': None, + + # File with a list of estimation targets used to determine the set of images + # for which the GT poses will be visualized. The file is assumed to be stored + # in the dataset folder. None = all images. + # 'targets_filename': 'test_targets_bop19.json', + 'targets_filename': None, + + # Select ID's of scenes, images and GT poses to be processed. + # Empty list [] means that all ID's will be used. + 'scene_ids': [], + 'im_ids': [], + 'gt_ids': [], + + # Indicates whether to render RGB images. + 'vis_rgb': True, + + # Indicates whether to resolve visibility in the rendered RGB images (using + # depth renderings). If True, only the part of object surface, which is not + # occluded by any other modeled object, is visible. If False, RGB renderings + # of individual objects are blended together. + 'vis_rgb_resolve_visib': True, + + # Indicates whether to save images of depth differences. + 'vis_depth_diff': False, + + # Whether to use the original model color. + 'vis_orig_color': False, + + # Type of the renderer (used for the VSD pose error function). + 'renderer_type': 'python', # Options: 'cpp', 'python'. + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, + + # Folder for output visualisations. + 'vis_path': os.path.join(config.output_path, 'vis_gt_poses'), + + # Path templates for output images. + 'vis_rgb_tpath': os.path.join( + '{vis_path}', '{dataset}', '{split}', '{scene_id:06d}', '{im_id:06d}.jpg'), + 'vis_depth_diff_tpath': os.path.join( + '{vis_path}', '{dataset}', '{split}', '{scene_id:06d}', + '{im_id:06d}_depth_diff.jpg'), +} +################################################################################ + + +# Load dataset parameters. +dp_split = dataset_params.get_split_params( + p['datasets_path'], p['dataset'], p['dataset_split'], p['dataset_split_type']) + +model_type = 'eval' # None = default. +dp_model = dataset_params.get_model_params( + p['datasets_path'], p['dataset'], model_type) + +# Load colors. +colors_path = os.path.join( + os.path.dirname(visualization.__file__), 'colors.json') +colors = inout.load_json(colors_path) + +# Subset of images for which the ground-truth poses will be rendered. +if p['targets_filename'] is not None: + targets = inout.load_json( + os.path.join(dp_split['base_path'], p['targets_filename'])) + scene_im_ids = {} + for target in targets: + scene_im_ids.setdefault( + target['scene_id'], set()).add(target['im_id']) +else: + scene_im_ids = None + +# List of considered scenes. +scene_ids_curr = dp_split['scene_ids'] +if p['scene_ids']: + scene_ids_curr = set(scene_ids_curr).intersection(p['scene_ids']) + +# Rendering mode. +renderer_modalities = [] +if p['vis_rgb']: + renderer_modalities.append('rgb') +if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): + renderer_modalities.append('depth') +renderer_mode = '+'.join(renderer_modalities) + +# Create a renderer. +width, height = dp_split['im_size'] +ren = renderer.create_renderer( + width, height, p['renderer_type'], mode=renderer_mode, shading='flat') + +# Load object models. +models = {} +for obj_id in dp_model['obj_ids']: + misc.log('Loading 3D model of object {}...'.format(obj_id)) + model_path = dp_model['model_tpath'].format(obj_id=obj_id) + model_color = None + if not p['vis_orig_color']: + model_color = tuple(colors[(obj_id - 1) % len(colors)]) + ren.add_object(obj_id, model_path, surf_color=model_color) + +for scene_id in scene_ids_curr: + + # Load scene info and ground-truth poses. + scene_camera = inout.load_scene_camera( + dp_split['scene_camera_tpath'].format(scene_id=scene_id)) + scene_gt = inout.load_scene_gt( + dp_split['scene_gt_tpath'].format(scene_id=scene_id)) + + # List of considered images. + if scene_im_ids is not None: + im_ids = scene_im_ids[scene_id] + else: + im_ids = sorted(scene_gt.keys()) + if p['im_ids']: + im_ids = set(im_ids).intersection(p['im_ids']) + + # Render the object models in the ground-truth poses in the selected images. + for im_counter, im_id in enumerate(im_ids): + if im_counter % 10 == 0: + misc.log( + 'Visualizing GT poses - dataset: {}, scene: {}, im: {}/{}'.format( + p['dataset'], scene_id, im_counter, len(im_ids))) + + K = scene_camera[im_id]['cam_K'] + + # List of considered ground-truth poses. + gt_ids_curr = range(len(scene_gt[im_id])) + if p['gt_ids']: + gt_ids_curr = set(gt_ids_curr).intersection(p['gt_ids']) + + # Collect the ground-truth poses. + gt_poses = [] + for gt_id in gt_ids_curr: + gt = scene_gt[im_id][gt_id] + gt_poses.append({ + 'obj_id': gt['obj_id'], + 'R': gt['cam_R_m2c'], + 't': gt['cam_t_m2c'], + 'text_info': [ + {'name': '', 'val': '{}:{}'.format(gt['obj_id'], gt_id), 'fmt': ''} + ] + }) + + # Load the color and depth images and prepare images for rendering. + rgb = None + if p['vis_rgb']: + if 'rgb' in dp_split['im_modalities']: + rgb = inout.load_im(dp_split['rgb_tpath'].format( + scene_id=scene_id, im_id=im_id)) + elif 'gray' in dp_split['im_modalities']: + gray = inout.load_im(dp_split['gray_tpath'].format( + scene_id=scene_id, im_id=im_id)) + rgb = np.dstack([gray, gray, gray]) + else: + raise ValueError('RGB nor gray images are available.') + + depth = None + if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): + depth = inout.load_depth(dp_split['depth_tpath'].format( + scene_id=scene_id, im_id=im_id)) + depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. + + # Path to the output RGB visualization. + vis_rgb_path = None + if p['vis_rgb']: + vis_rgb_path = p['vis_rgb_tpath'].format( + vis_path=p['vis_path'], dataset=p['dataset'], split=p['dataset_split'], + scene_id=scene_id, im_id=im_id) + + # Path to the output depth difference visualization. + vis_depth_diff_path = None + if p['vis_depth_diff']: + vis_depth_diff_path = p['vis_depth_diff_tpath'].format( + vis_path=p['vis_path'], dataset=p['dataset'], split=p['dataset_split'], + scene_id=scene_id, im_id=im_id) + + # Visualization. + visualization.vis_object_poses( + poses=gt_poses, K=K, renderer=ren, rgb=rgb, depth=depth, + vis_rgb_path=vis_rgb_path, vis_depth_diff_path=vis_depth_diff_path, + vis_rgb_resolve_visib=p['vis_rgb_resolve_visib']) + +misc.log('Done.') diff --git a/scripts/vis_object_symmetries.py b/scripts/vis_object_symmetries.py new file mode 100644 index 0000000..7131221 --- /dev/null +++ b/scripts/vis_object_symmetries.py @@ -0,0 +1,99 @@ +# Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) +# Center for Machine Perception, Czech Technical University in Prague + +"""Visualizes object models under all identified symmetry transformations.""" + +import os +import numpy as np + +from bop_toolkit_lib import config +from bop_toolkit_lib import dataset_params +from bop_toolkit_lib import inout +from bop_toolkit_lib import misc +from bop_toolkit_lib import renderer +from bop_toolkit_lib import transform as tr + + +# PARAMETERS. +################################################################################ +p = { + # See dataset_params.py for options. + 'dataset': 'lm', + + # Type of the renderer (used for the VSD pose error function). + 'renderer_type': 'python', # Options: 'cpp', 'python'. + + # See misc.get_symmetry_transformations(). + 'max_sym_disc_step': 0.01, + + 'views': [ + { + 'R': tr.rotation_matrix(0.5 * np.pi, [1, 0, 0]).dot( + tr.rotation_matrix(-0.5 * np.pi, [0, 0, 1])).dot( + tr.rotation_matrix(0.1 * np.pi, [0, 1, 0]))[:3, :3], + 't': np.array([[0, 0, 500]]).T + } + ], + + # Folder containing the BOP datasets. + 'datasets_path': config.datasets_path, + + # Folder for output visualisations. + 'vis_path': os.path.join(config.output_path, 'vis_object_symmetries'), + + # Path templates for output images. + 'vis_rgb_tpath': os.path.join( + '{vis_path}', '{dataset}', '{obj_id:06d}', + '{view_id:06d}_{pose_id:06d}.jpg'), +} +################################################################################ + + +# Load dataset parameters. +model_type = None # None = default. +if p['dataset'] == 'tless': + model_type = 'cad' +dp_model = dataset_params.get_model_params( + p['datasets_path'], p['dataset'], model_type) +dp_camera = dataset_params.get_camera_params( + p['datasets_path'], p['dataset']) + +K = dp_camera['K'] +fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] + +# Create a renderer. +width, height = dp_camera['im_size'] +ren = renderer.create_renderer( + width, height, p['renderer_type'], mode='rgb', shading='flat') + +# Load meta info about the models (including symmetries). +models_info = inout.load_json(dp_model['models_info_path'], keys_to_int=True) + + +for obj_id in dp_model['obj_ids']: + + # Load object model. + misc.log('Loading 3D model of object {}...'.format(obj_id)) + model_path = dp_model['model_tpath'].format(obj_id=obj_id) + ren.add_object(obj_id, model_path) + + poses = misc.get_symmetry_transformations( + models_info[obj_id], p['max_sym_disc_step']) + + for pose_id, pose in enumerate(poses): + + for view_id, view in enumerate(p['views']): + + R = view['R'].dot(pose['R']) + t = view['R'].dot(pose['t']) + view['t'] + + vis_rgb = ren.render_object(obj_id, R, t, fx, fy, cx, cy)['rgb'] + + # Path to the output RGB visualization. + vis_rgb_path = p['vis_rgb_tpath'].format( + vis_path=p['vis_path'], dataset=p['dataset'], obj_id=obj_id, + view_id=view_id, pose_id=pose_id) + misc.ensure_dir(os.path.dirname(vis_rgb_path)) + inout.save_im(vis_rgb_path, vis_rgb) + +misc.log('Done.')