From 48057c4e7a903c80dfd1c17f84eba3de6d9e3fad Mon Sep 17 00:00:00 2001 From: Vanessasaurus <814322+vsoch@users.noreply.github.com> Date: Mon, 21 Nov 2022 22:59:29 -0700 Subject: [PATCH] adding linting/formatting with pre-commit (#239) * adding linting/formatting with pre-commit Signed-off-by: vsoch --- .github/dev-requirements.txt | 4 + .github/workflows/codespell.yaml | 2 +- .github/workflows/docs.yaml | 1 - .github/workflows/main.yaml | 8 +- .gitignore | 1 - .pre-commit-config.yaml | 31 ++++++ CHANGELOG.md | 25 ++--- CONTRIBUTING.md | 6 +- deid/config/utils.py | 8 +- deid/data/__init__.py | 8 +- deid/data/deid.dicom | 8 +- deid/data/deid.dicom.xray.chest | 2 +- deid/dicom/__init__.py | 2 +- deid/dicom/config.json | 6 +- deid/dicom/groups.py | 4 - deid/dicom/header.py | 4 - deid/dicom/pixels/clean.py | 5 +- deid/dicom/pixels/detect.py | 8 +- deid/dicom/tags.py | 2 +- deid/dicom/utils.py | 2 +- deid/logger/message.py | 3 - deid/main/__init__.py | 2 +- deid/tests/Xtest_dicom_header.py | 3 - .../filter_multiple_first_filter_match.dicom | 2 +- .../filter_multiple_rule_innerop_false.dicom | 2 +- .../filter_multiple_rule_innerop_true.dicom | 2 +- .../filter_multiple_second_filter_match.dicom | 2 +- .../filter_multiple_two_filter_match.dicom | 2 +- .../filter_multiple_zero_filter_match.dicom | 2 +- .../resources/filter_single_rule_false.dicom | 2 +- .../filter_single_rule_innerop_false.dicom | 2 +- .../filter_single_rule_innerop_true.dicom | 2 +- .../resources/filter_single_rule_true.dicom | 2 +- deid/tests/resources/keepcoordinates.dicom | 2 +- .../resources/keepcoordinates_from.dicom | 4 +- .../resources/keepcoordinates_noaction.dicom | 2 +- deid/tests/resources/remove_all.dicom | 2 +- deid/tests/resources/remove_coordinates.dicom | 2 +- .../remove_coordinates_multiple.dicom | 2 +- .../remove_coordinates_multiple_filters.dicom | 2 +- .../resources/remove_coordinates_us.dicom | 2 +- .../resources/remove_coordinates_us_all.dicom | 2 +- deid/tests/test_clean.py | 4 - deid/tests/test_clean_pixel_dimensions.py | 3 - deid/tests/test_data.py | 4 - deid/tests/test_deid_recipe.py | 3 - deid/tests/test_dicom_fields.py | 4 - deid/tests/test_dicom_funcs.py | 3 - deid/tests/test_dicom_groups.py | 3 - deid/tests/test_dicom_utils.py | 3 - deid/tests/test_file_meta.py | 4 - deid/tests/test_filter_detect.py | 4 - deid/tests/test_replace_identifiers.py | 6 +- deid/tests/test_utils.py | 4 - deid/tests/test_utils_files.py | 4 - deid/utils/actions.py | 2 +- deid/version.py | 2 +- docs/README.md | 2 +- docs/_data/links.yml | 3 +- docs/_docs/contributing/code.md | 2 +- docs/_docs/contributing/docs.md | 4 +- docs/_docs/development/image-format.md | 44 ++++----- docs/_docs/development/index.md | 11 ++- docs/_docs/development/linting-format.md | 28 ++++++ docs/_docs/examples/client.md | 22 ++--- docs/_docs/examples/deid-dataset.md | 16 ++-- docs/_docs/examples/func-replace.md | 24 ++--- docs/_docs/examples/func-sequence-replace.md | 28 +++--- docs/_docs/examples/header-expanders.md | 20 ++-- docs/_docs/examples/recipe.md | 76 +++++++-------- docs/_docs/getting-started/dicom-config.md | 8 +- docs/_docs/getting-started/dicom-get.md | 32 +++---- docs/_docs/getting-started/dicom-loading.md | 32 +++---- docs/_docs/getting-started/dicom-pixels.md | 94 +++++++++---------- docs/_docs/getting-started/dicom-put.md | 26 ++--- docs/_docs/getting-started/index.md | 2 +- docs/_docs/install/docker.md | 8 +- docs/_docs/install/local.md | 2 +- docs/_docs/user-docs/client.md | 84 ++++++++--------- docs/_docs/user-docs/index.md | 2 +- docs/_docs/user-docs/recipe-filters.md | 50 +++++----- docs/_docs/user-docs/recipe-funcs.md | 6 +- docs/_docs/user-docs/recipe-groups.md | 8 +- docs/_docs/user-docs/recipe-headers.md | 2 +- docs/_docs/user-docs/recipe-labels.md | 12 +-- docs/_docs/user-docs/tags.md | 10 +- docs/api_docs/conf.py | 5 +- docs/api_docs/index.rst | 2 +- docs/assets/css/deid.css | 6 +- docs/assets/img/emblem.svg | 2 +- docs/assets/js/lunr.min.js | 2 +- docs/assets/js/search.js | 2 +- docs/pages/index.md | 8 +- examples/README.md | 4 +- examples/dicom/README.md | 4 +- examples/dicom/dicom-extract/README.md | 2 +- .../dicom/dicom-extract/create-dicom-csv.py | 9 +- .../header-manipulation/file-meta/example.py | 2 +- .../header-manipulation/func-replacement.py | 40 ++++---- .../func-sequence-replace/example.py | 2 +- examples/dicom/pixels/run-cleaner-client.py | 8 +- examples/dicom/pixels/run-inspect-pixels.py | 3 +- examples/dicom/recipe/deid-dicom-example.py | 21 ++--- pyproject.toml | 7 ++ setup.cfg | 13 +++ setup.py | 5 +- 106 files changed, 523 insertions(+), 520 deletions(-) create mode 100644 .github/dev-requirements.txt create mode 100644 .pre-commit-config.yaml create mode 100644 docs/_docs/development/linting-format.md create mode 100644 pyproject.toml diff --git a/.github/dev-requirements.txt b/.github/dev-requirements.txt new file mode 100644 index 00000000..0b29a889 --- /dev/null +++ b/.github/dev-requirements.txt @@ -0,0 +1,4 @@ +pre-commit +black +isort +flake8 diff --git a/.github/workflows/codespell.yaml b/.github/workflows/codespell.yaml index e29d891d..b58b986a 100644 --- a/.github/workflows/codespell.yaml +++ b/.github/workflows/codespell.yaml @@ -14,5 +14,5 @@ jobs: steps: - uses: actions/checkout@v3 - uses: crate-ci/typos@592b36d23c62cb378f6097a292bc902ee73f93ef # version 1.0.4 - with: + with: files: ./deid ./docs/_docs ./docs/README.md ./docs/pages ./examples diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index c5a6a847..b6adc89b 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -47,4 +47,3 @@ jobs: git commit -a -m "Adding changed documentation files" git push origin gh-pages || echo "up to date" fi - diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index ac13d6e9..3f394afc 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -16,12 +16,12 @@ jobs: - name: Setup black environment run: conda create --quiet --name black black - - name: Check formatting + - name: Lint and format Python code run: | export PATH="/usr/share/miniconda/bin:$PATH" source activate black - pip install black --upgrade - black --check --verbose deid + pip install -r .github/dev-requirements.txt + pre-commit run --all-files testing: runs-on: ubuntu-latest @@ -29,7 +29,7 @@ jobs: - uses: actions/checkout@v3 - name: Setup conda environment - run: | + run: | conda create --quiet --name testing export PATH="/usr/share/miniconda/bin:$PATH" source activate testing diff --git a/.gitignore b/.gitignore index 4de0ca2c..133d5dbe 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,3 @@ Temporary Items # PyCharm .idea/ - diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..3f140ca3 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,31 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-docstring-first + - id: end-of-file-fixer + - id: trailing-whitespace + - id: mixed-line-ending + + - repo: local + hooks: + - id: black + name: black + language: python + types: [python] + entry: black + + - id: isort + name: isort + args: [--filter-files] + language: python + types: [python] + entry: isort + + - id: flake8 + name: flake8 + language: python + types: [python] + entry: flake8 diff --git a/CHANGELOG.md b/CHANGELOG.md index ec05dda4..db7af980 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,32 +15,33 @@ Referenced versions in headers are tagged on Github, in parentheses are for pypi ## [vxx](https://github.com/pydicom/deid/tree/master) (master) +- pre-commit for linting and formatting (0.3.1) - Add `ctpcoordinates` and `ctpkeepcoordinates` to handle different formats (0.3.0) -- Minimum Python required is 3.7, numpy 1.20 + - Minimum Python required is 3.7, numpy 1.20 - Remove unecessary typing - adds bugs (0.2.37) - Provide data as an external package (0.2.36) - Restore expand_sequences to get_identifiers (0.2.35) - Add function to clean datasets without `DicomCleaner` [#223](https://github.com/pydicom/deid/pull/223) (0.2.34) - add select:vr:XX field expander to select elements by VR (0.2.33) -- rename group:XXXX field expander to select:group:XXXX + - rename group:XXXX field expander to select:group:XXXX - add group:XXXX field expander to select all elements with a specified DICOM tag group (0.2.32) - custom class example for using dicom.Dataset, not requiring on client init [#211](https://github.com/pydicom/deid/pull/211) (0.2.31) - adding support for deid provided functions [#207](https://github.com/pydicom/deid/issues/207) (0.2.3) - update CTP deid.dicom up until [this commit](https://github.com/johnperry/CTP/commit/345b05b157c046532e8791a63ababbf6d0dba59b) (0.2.29) - various LGTM alert fixes [#186](https://github.com/pydicom/deid/pull/186) (0.0.28) -- `REPLACE/JITTER` actions have now higher priority than `REMOVE`, allowing to whitelist fields from `REMOVE ALL/Field` [#197](https://github.com/pydicom/deid/issues/197) -- `ADD/KEEP` actions have now higher priority than `REMOVE`, allowing to whitelist fields from `REMOVE ALL/Field` [#197](https://github.com/pydicom/deid/issues/197) -- updated pydicom dependency from 2.1.1 to 2.2.2 [#194](https://github.com/pydicom/deid/issues/194) + - `REPLACE/JITTER` actions have now higher priority than `REMOVE`, allowing to whitelist fields from `REMOVE ALL/Field` [#197](https://github.com/pydicom/deid/issues/197) + - `ADD/KEEP` actions have now higher priority than `REMOVE`, allowing to whitelist fields from `REMOVE ALL/Field` [#197](https://github.com/pydicom/deid/issues/197) + - updated pydicom dependency from 2.1.1 to 2.2.2 [#194](https://github.com/pydicom/deid/issues/194) - bug fix for exception when attempting to jitter DA/DT which cannot be jittered (space) [#189] () (0.2.27) - adding support to manipulate file meta [#183](https://github.com/pydicom/deid/issues/183) (0.2.26) - updated pydicom dependency from 1.3.0 to 2.1.1 [#171](https://github.com/pydicom/deid/issues/171) (0.2.25) -- bug fix for multivalued fields in %values lists [#174](https://github.com/pydicom/deid/issues/174) -- allowing other VR types for jitter [#175](https://github.com/pydicom/deid/issues/175) -- ensuring that an add/replace of an existing value is also updated in fields [#173](https://github.com/pydicom/deid/issues/173) + - bug fix for multivalued fields in %values lists [#174](https://github.com/pydicom/deid/issues/174) + - allowing other VR types for jitter [#175](https://github.com/pydicom/deid/issues/175) + - ensuring that an add/replace of an existing value is also updated in fields [#173](https://github.com/pydicom/deid/issues/173) - change to correct issue with deidentifying RGB images [#165](https://github.com/pydicom/deid/issues/165) (0.2.24) - removing verbosity of debug logger (0.2.23) - changing iteration technique through fields to properly add nested uids [#153](https://github.com/pydicom/deid/issues/153) (0.2.22) -- change to return results from detect when recipe does not contain filters [#155](https://github.com/pydicom/deid/issues/155) + - change to return results from detect when recipe does not contain filters [#155](https://github.com/pydicom/deid/issues/155) - fix to correct bug in detect [#142](https://github.com/pydicom/deid/issues/142) (0.2.21) - fixes to detect and clean to better represent keep/coordinates (0.2.20) - modify default VR for added tags [#146](https://github.com/pydicom/deid/issues/146), bug with private tags in %fields section [#147](https://github.com/pydicom/deid/issues/147) (0.2.19) @@ -78,14 +79,14 @@ Referenced versions in headers are tagged on Github, in parentheses are for pypi - need to clean up temporary directory (mkdtemp), issue #68 (0.1.18) - fixing issue #65, save for compressed data (0.1.17) - matplotlib must be less than or equal to 2.1.2 for install (0.1.16) -- fixing bug with clean coordinate flipping rectangle + - fixing bug with clean coordinate flipping rectangle - Fixing bug with saving self.cleaned (0.1.15) - Allowing for datasets to be passed in functions (not necessary for files) (0.1.14) - index should be full path in header.py (0.1.13) - pydicom bumped to install latest (1.0.2) (0.1.12) - ensuring that ids for images are full paths (0.1.11) -- addition of the DeidRecipe class to better interact with and combine deid recipe files. -- the get_files function now returns a generator instead of a list. + - addition of the DeidRecipe class to better interact with and combine deid recipe files. + - the get_files function now returns a generator instead of a list. ## [0.1.1](https://pypi.python.org/packages/28/26/ee80e7f1c3f65fae1c901497bb2388701158f0c96e0d633ab301abeaa478/deid-0.1.1.tar.gz#md5=39df7efb03e5d3b63308016742062a43) (0.1.1) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 816c8e8e..d206a6ba 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,8 +24,8 @@ This code is licensed under the MIT [LICENSE](LICENSE). 4. The project's default copyright and header have been included in any new source files. 5. All (major) changes to deid must be documented in - [docs](docs). If your PR changes a core functionality, please - include clear description of the changes in your PR so that the docs + [docs](docs). If your PR changes a core functionality, please + include clear description of the changes in your PR so that the docs can be updated, or better, submit another PR to update the docs directly. 6. If necessary, update the [README](README.md), and the [CHANGELOG](CHANGELOG.md). 7. The pull request will be reviewed by others, and the final merge must be @@ -97,7 +97,7 @@ an incident. Further details of specific enforcement policies may be posted separately. Project maintainers, contributors and users who do not follow or enforce the -Code of Conduct in good faith may face temporary or permanent repercussions +Code of Conduct in good faith may face temporary or permanent repercussions with their involvement in the project as determined by the project's leader(s). ## Attribution diff --git a/deid/config/utils.py b/deid/config/utils.py index 5d540039..e17679fe 100644 --- a/deid/config/utils.py +++ b/deid/config/utils.py @@ -135,7 +135,7 @@ def load_deid(path=None): parts = line.split(" ") if len(parts) > 1: section_name = " ".join(parts[1:]) - section = re.sub("[%]|(\s+)", "", parts[0]).lower() + section = re.sub("[%]|(\s+)", "", parts[0]).lower() # noqa if section not in sections: bot.exit("%s is not a valid section." % section) @@ -225,7 +225,7 @@ def parse_format(line): ========== line: the line that starts with format. """ - fmt = re.sub("FORMAT|(\s+)", "", line).lower() + fmt = re.sub("FORMAT|(\s+)", "", line).lower() # noqa if fmt not in formats: bot.exit("%s is not a valid format." % fmt) bot.debug("FORMAT set to %s" % fmt) @@ -386,8 +386,8 @@ def parse_member(members, operator=None): member = members.pop(0).strip() # Find the first || or + - match_or = re.search("\|\|", member) - match_and = re.search("\+", member) + match_or = re.search("\|\|", member) # noqa + match_and = re.search("\+", member) # noqa if match_or is not None: operator = "||" diff --git a/deid/data/__init__.py b/deid/data/__init__.py index 50f8d520..667650d2 100644 --- a/deid/data/__init__.py +++ b/deid/data/__init__.py @@ -2,12 +2,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Simple loading functions for datasets - - from deid.data import get_dataset -""" - import os data_base = os.path.abspath(os.path.dirname(__file__)) @@ -23,7 +17,7 @@ def get_dataset(dataset=None): """ try: from deid_data import data - except: + except ImportError: raise ValueError("install deid data with `pip install deid-data`") return data.get_dataset(dataset) diff --git a/deid/data/deid.dicom b/deid/data/deid.dicom index 21fb7873..408f8132 100644 --- a/deid/data/deid.dicom +++ b/deid/data/deid.dicom @@ -213,7 +213,7 @@ LABEL Logiq US LOGIQE # (AMBR) LABEL Philips IU22 # (CTP) contains Modality US + contains Manufacturer Philips - + equals Rows 480 + + equals Rows 480 + contains ManufacturerModelName iU22 ctpcoordinates 0,0,640,47 @@ -285,7 +285,7 @@ LABEL EPIQ_7G (CTP) ctpcoordinates 0,0,800,59 LABEL Z_ONE # (CTP) - contains Modality US + contains Modality US + contains Manufacturer Zonare + equals Rows 600 + contains ManufacturerModelName Z_ONE @@ -316,7 +316,7 @@ LABEL ATL HDI4000 # (FFUR) LABEL Siemens SC2000 # (CTP) contains Modality US - + contains Manufacturer Siemens + + contains Manufacturer Siemens + equals Rows 768 + equals Columns 1024 + contains ManufacturerModelName SC2000 @@ -325,7 +325,7 @@ LABEL Siemens SC2000 # (CTP) LABEL Siemens Antares # (CTP) contains Modality US - + contains Manufacturer Siemens + + contains Manufacturer Siemens + equals Rows 768 + equals Columns 1024 + contains ManufacturerModelName Antares diff --git a/deid/data/deid.dicom.xray.chest b/deid/data/deid.dicom.xray.chest index 95977ebf..deccb298 100644 --- a/deid/data/deid.dicom.xray.chest +++ b/deid/data/deid.dicom.xray.chest @@ -2,7 +2,7 @@ FORMAT dicom %filter whitelist -LABEL Matt Lungren CHEST +LABEL Matt Lungren CHEST contains Modality CR|DX|PR || contains StudyDescription DX|PR + contains StudyDescription CHEST || contains BodyPartExamined CHEST diff --git a/deid/dicom/__init__.py b/deid/dicom/__init__.py index 0ffe5f69..3a0f6452 100644 --- a/deid/dicom/__init__.py +++ b/deid/dicom/__init__.py @@ -1,4 +1,4 @@ from .fields import extract_sequence from .header import get_identifiers, remove_private_identifiers, replace_identifiers -from .pixels import DicomCleaner, has_burned_pixels, clean_pixel_data +from .pixels import DicomCleaner, clean_pixel_data, has_burned_pixels from .utils import get_files diff --git a/deid/dicom/config.json b/deid/dicom/config.json index a15274c7..aef7c78b 100644 --- a/deid/dicom/config.json +++ b/deid/dicom/config.json @@ -1,6 +1,6 @@ { - "get": { - + "get": { + "skip": ["PixelData", "RedPaletteColorLookupTableData", "GreenPaletteColorLookupTableData", @@ -20,7 +20,7 @@ "put":{ - "actions":[ + "actions":[ {"action":"KEEP","field":"PixelData"} ] } diff --git a/deid/dicom/groups.py b/deid/dicom/groups.py index ae766b51..a60a7490 100644 --- a/deid/dicom/groups.py +++ b/deid/dicom/groups.py @@ -2,10 +2,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -groups: functions to derive groups of fields or values -""" - from pydicom.multival import MultiValue diff --git a/deid/dicom/header.py b/deid/dicom/header.py index e97b13e7..03b8a30f 100644 --- a/deid/dicom/header.py +++ b/deid/dicom/header.py @@ -2,10 +2,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -header.py: functions to extract identifiers from dicom headers -""" - import os diff --git a/deid/dicom/pixels/clean.py b/deid/dicom/pixels/clean.py index 195b63cc..094af8c3 100644 --- a/deid/dicom/pixels/clean.py +++ b/deid/dicom/pixels/clean.py @@ -2,9 +2,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -clean.py: functions for pixel scrubbing -""" import math import os @@ -26,7 +23,7 @@ matplotlib.use("pdf") -from matplotlib import pyplot as plt +from matplotlib import pyplot as plt # noqa bot.level = 3 diff --git a/deid/dicom/pixels/detect.py b/deid/dicom/pixels/detect.py index 8f02779a..36d5c114 100644 --- a/deid/dicom/pixels/detect.py +++ b/deid/dicom/pixels/detect.py @@ -3,13 +3,9 @@ __license__ = "MIT" -""" -detect.py: functions for pixel scrubbing -""" +from typing import List, Optional, Union -from typing import Union, List, Optional - -from pydicom import read_file, FileDataset +from pydicom import FileDataset, read_file from pydicom.sequence import Sequence from deid.config import DeidRecipe diff --git a/deid/dicom/tags.py b/deid/dicom/tags.py index 5417a18c..44bf559d 100644 --- a/deid/dicom/tags.py +++ b/deid/dicom/tags.py @@ -109,7 +109,7 @@ def remove_sequences(dicom): dicom: the loaded dicom to remove sequences """ for elem in dicom.iterall(): - if isinstance(elem.value, Sequence) and dicom.get(elem.tag) != None: + if isinstance(elem.value, Sequence) and dicom.get(elem.tag) is not None: del dicom[elem.tag] return dicom diff --git a/deid/dicom/utils.py b/deid/dicom/utils.py index 04a298bd..b11436fe 100644 --- a/deid/dicom/utils.py +++ b/deid/dicom/utils.py @@ -11,8 +11,8 @@ from deid.logger import bot from deid.utils import recursive_find -from .validate import validate_dicoms +from .validate import validate_dicoms ################################################################################ # Functions for Dicom files diff --git a/deid/logger/message.py b/deid/logger/message.py index 33c2c313..cca7f8cb 100644 --- a/deid/logger/message.py +++ b/deid/logger/message.py @@ -2,9 +2,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -logger/message.py: Python logger base -""" import os import sys diff --git a/deid/main/__init__.py b/deid/main/__init__.py index 33ab9ecc..a285f7d6 100644 --- a/deid/main/__init__.py +++ b/deid/main/__init__.py @@ -77,7 +77,7 @@ def get_parser(): dest="command", ) - version = subparsers.add_parser( + subparsers.add_parser( "version", help="print version and exit" # pylint: disable=unused-variable ) diff --git a/deid/tests/Xtest_dicom_header.py b/deid/tests/Xtest_dicom_header.py index 3108d6d6..2012f0a5 100644 --- a/deid/tests/Xtest_dicom_header.py +++ b/deid/tests/Xtest_dicom_header.py @@ -4,9 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test dicom header -""" import os import shutil diff --git a/deid/tests/resources/filter_multiple_first_filter_match.dicom b/deid/tests/resources/filter_multiple_first_filter_match.dicom index 9aeef3e5..ea051255 100644 --- a/deid/tests/resources/filter_multiple_first_filter_match.dicom +++ b/deid/tests/resources/filter_multiple_first_filter_match.dicom @@ -13,4 +13,4 @@ LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. + contains Manufacturer Agfa %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_multiple_rule_innerop_false.dicom b/deid/tests/resources/filter_multiple_rule_innerop_false.dicom index be12f3ac..66889e3b 100644 --- a/deid/tests/resources/filter_multiple_rule_innerop_false.dicom +++ b/deid/tests/resources/filter_multiple_rule_innerop_false.dicom @@ -7,4 +7,4 @@ LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. + contains Manufacturer Agfa %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_multiple_rule_innerop_true.dicom b/deid/tests/resources/filter_multiple_rule_innerop_true.dicom index dd51617c..1a4cdbe3 100644 --- a/deid/tests/resources/filter_multiple_rule_innerop_true.dicom +++ b/deid/tests/resources/filter_multiple_rule_innerop_true.dicom @@ -7,4 +7,4 @@ LABEL - To be tested with Cat.dcm. Intended to flag the image. + contains Manufacturer Agfa %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_multiple_second_filter_match.dicom b/deid/tests/resources/filter_multiple_second_filter_match.dicom index 2818aba4..9ddd6c47 100644 --- a/deid/tests/resources/filter_multiple_second_filter_match.dicom +++ b/deid/tests/resources/filter_multiple_second_filter_match.dicom @@ -13,4 +13,4 @@ LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. + contains Manufacturer Agfa %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_multiple_two_filter_match.dicom b/deid/tests/resources/filter_multiple_two_filter_match.dicom index 7fc0de47..abb3ccb3 100644 --- a/deid/tests/resources/filter_multiple_two_filter_match.dicom +++ b/deid/tests/resources/filter_multiple_two_filter_match.dicom @@ -11,4 +11,4 @@ LABEL - To be tested with Cat.dcm. Intended to flag the image. + contains Manufacturer Agfa %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_multiple_zero_filter_match.dicom b/deid/tests/resources/filter_multiple_zero_filter_match.dicom index 78ca5a6e..ecc30c00 100644 --- a/deid/tests/resources/filter_multiple_zero_filter_match.dicom +++ b/deid/tests/resources/filter_multiple_zero_filter_match.dicom @@ -13,4 +13,4 @@ LABEL - To be tested with Cat.dcm. Intended to flag the image. + contains Manufacturer Agfa %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_single_rule_false.dicom b/deid/tests/resources/filter_single_rule_false.dicom index 1dba80f2..d6a3c4e4 100644 --- a/deid/tests/resources/filter_single_rule_false.dicom +++ b/deid/tests/resources/filter_single_rule_false.dicom @@ -6,4 +6,4 @@ LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. contains Modality CT %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_single_rule_innerop_false.dicom b/deid/tests/resources/filter_single_rule_innerop_false.dicom index ea62f84e..f7bea9f0 100644 --- a/deid/tests/resources/filter_single_rule_innerop_false.dicom +++ b/deid/tests/resources/filter_single_rule_innerop_false.dicom @@ -6,4 +6,4 @@ LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. contains Modality CT + contains PatientSex F %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_single_rule_innerop_true.dicom b/deid/tests/resources/filter_single_rule_innerop_true.dicom index 34265c1c..3406ad88 100644 --- a/deid/tests/resources/filter_single_rule_innerop_true.dicom +++ b/deid/tests/resources/filter_single_rule_innerop_true.dicom @@ -6,4 +6,4 @@ LABEL - To be tested with Cat.dcm. Intended to flag the image. contains Modality DX + contains PatientSex M %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/filter_single_rule_true.dicom b/deid/tests/resources/filter_single_rule_true.dicom index 9f3c096b..c10f29c3 100644 --- a/deid/tests/resources/filter_single_rule_true.dicom +++ b/deid/tests/resources/filter_single_rule_true.dicom @@ -6,4 +6,4 @@ LABEL - To be tested with Cat.dcm. Intended to flag the image. contains Modality DX %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/keepcoordinates.dicom b/deid/tests/resources/keepcoordinates.dicom index 65517621..60234f84 100644 --- a/deid/tests/resources/keepcoordinates.dicom +++ b/deid/tests/resources/keepcoordinates.dicom @@ -8,4 +8,4 @@ LABEL by SOPClassUID keepcoordinates 0,0,1024,1024 %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/keepcoordinates_from.dicom b/deid/tests/resources/keepcoordinates_from.dicom index 0d6bae3b..14d8a6d4 100644 --- a/deid/tests/resources/keepcoordinates_from.dicom +++ b/deid/tests/resources/keepcoordinates_from.dicom @@ -5,7 +5,7 @@ LABEL Clean Ultrasound Regions present SequenceOfUltrasoundRegions coordinates all keepcoordinates from:SequenceOfUltrasoundRegions - + %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/keepcoordinates_noaction.dicom b/deid/tests/resources/keepcoordinates_noaction.dicom index 9a2ea4af..d6a52a6b 100644 --- a/deid/tests/resources/keepcoordinates_noaction.dicom +++ b/deid/tests/resources/keepcoordinates_noaction.dicom @@ -7,4 +7,4 @@ LABEL by SOPClassUID keepcoordinates 0,0,1024,1024 %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/remove_all.dicom b/deid/tests/resources/remove_all.dicom index 6d6b1375..766d15d6 100644 --- a/deid/tests/resources/remove_all.dicom +++ b/deid/tests/resources/remove_all.dicom @@ -7,4 +7,4 @@ LABEL by SOPClassUID coordinates all %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/remove_coordinates.dicom b/deid/tests/resources/remove_coordinates.dicom index 96265a44..7038b8bb 100644 --- a/deid/tests/resources/remove_coordinates.dicom +++ b/deid/tests/resources/remove_coordinates.dicom @@ -7,4 +7,4 @@ LABEL by SOPClassUID coordinates 0,0,1024,1024 %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/remove_coordinates_multiple.dicom b/deid/tests/resources/remove_coordinates_multiple.dicom index fa1a88e1..59a724b4 100644 --- a/deid/tests/resources/remove_coordinates_multiple.dicom +++ b/deid/tests/resources/remove_coordinates_multiple.dicom @@ -8,4 +8,4 @@ LABEL by SOPClassUID coordinates 10,10,20,20 %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/remove_coordinates_multiple_filters.dicom b/deid/tests/resources/remove_coordinates_multiple_filters.dicom index 0e438b7a..8a626e90 100644 --- a/deid/tests/resources/remove_coordinates_multiple_filters.dicom +++ b/deid/tests/resources/remove_coordinates_multiple_filters.dicom @@ -13,4 +13,4 @@ LABEL by SOPClassUID blacklist2 coordinates 10,10,20,20 %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/remove_coordinates_us.dicom b/deid/tests/resources/remove_coordinates_us.dicom index 7b6b35bd..ad98b83e 100644 --- a/deid/tests/resources/remove_coordinates_us.dicom +++ b/deid/tests/resources/remove_coordinates_us.dicom @@ -6,4 +6,4 @@ LABEL by Modality coordinates 0,0,500,500 %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/resources/remove_coordinates_us_all.dicom b/deid/tests/resources/remove_coordinates_us_all.dicom index 64d2712e..0c18db7a 100644 --- a/deid/tests/resources/remove_coordinates_us_all.dicom +++ b/deid/tests/resources/remove_coordinates_us_all.dicom @@ -6,4 +6,4 @@ LABEL by Modality coordinates all %header -ADD PatientIdentityRemoved No \ No newline at end of file +ADD PatientIdentityRemoved No diff --git a/deid/tests/test_clean.py b/deid/tests/test_clean.py index 3ad3b49e..8f403451 100644 --- a/deid/tests/test_clean.py +++ b/deid/tests/test_clean.py @@ -5,10 +5,6 @@ __license__ = "MIT" -""" -Test DICOM Cleaner -""" - import os import shutil import tempfile diff --git a/deid/tests/test_clean_pixel_dimensions.py b/deid/tests/test_clean_pixel_dimensions.py index 70875c75..72083c49 100644 --- a/deid/tests/test_clean_pixel_dimensions.py +++ b/deid/tests/test_clean_pixel_dimensions.py @@ -4,9 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test DICOM Cleaner - Images with varying pixel dimensions -""" import os import shutil diff --git a/deid/tests/test_data.py b/deid/tests/test_data.py index 4ef9314d..cccf1b3f 100644 --- a/deid/tests/test_data.py +++ b/deid/tests/test_data.py @@ -4,10 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test data functions -""" - import os import unittest diff --git a/deid/tests/test_deid_recipe.py b/deid/tests/test_deid_recipe.py index 6d5de94a..4aef5796 100644 --- a/deid/tests/test_deid_recipe.py +++ b/deid/tests/test_deid_recipe.py @@ -4,9 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test DeidRecipe class -""" import os import shutil diff --git a/deid/tests/test_dicom_fields.py b/deid/tests/test_dicom_fields.py index e1d747e4..ea117bd8 100644 --- a/deid/tests/test_dicom_fields.py +++ b/deid/tests/test_dicom_fields.py @@ -4,10 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Testing field parsing and expansion -""" - import os import shutil import tempfile diff --git a/deid/tests/test_dicom_funcs.py b/deid/tests/test_dicom_funcs.py index 938b9b96..4145e7a4 100644 --- a/deid/tests/test_dicom_funcs.py +++ b/deid/tests/test_dicom_funcs.py @@ -4,9 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Testing deid provided functions -""" import re import shutil diff --git a/deid/tests/test_dicom_groups.py b/deid/tests/test_dicom_groups.py index e0520350..6d500877 100644 --- a/deid/tests/test_dicom_groups.py +++ b/deid/tests/test_dicom_groups.py @@ -4,9 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Testing groups for a deid recipe (values and fields) -""" import os import shutil diff --git a/deid/tests/test_dicom_utils.py b/deid/tests/test_dicom_utils.py index 2ac66a78..1d9c883c 100644 --- a/deid/tests/test_dicom_utils.py +++ b/deid/tests/test_dicom_utils.py @@ -4,9 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test dicom utils -""" import os import shutil diff --git a/deid/tests/test_file_meta.py b/deid/tests/test_file_meta.py index 24f77dcd..078668f8 100644 --- a/deid/tests/test_file_meta.py +++ b/deid/tests/test_file_meta.py @@ -4,10 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test file meta -""" - import unittest from deid.data import get_dataset diff --git a/deid/tests/test_filter_detect.py b/deid/tests/test_filter_detect.py index 1fab21a8..f1172497 100644 --- a/deid/tests/test_filter_detect.py +++ b/deid/tests/test_filter_detect.py @@ -3,10 +3,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test Filter Detection -""" - import os import shutil import tempfile diff --git a/deid/tests/test_replace_identifiers.py b/deid/tests/test_replace_identifiers.py index 8c6fc48f..c1ff4339 100644 --- a/deid/tests/test_replace_identifiers.py +++ b/deid/tests/test_replace_identifiers.py @@ -4,10 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test replace_identifiers -""" - import os import shutil import tempfile @@ -770,7 +766,7 @@ def test_remove_all_blank_field_compounding_should_remove(self): self.assertEqual("Yes", parser.dicom["PatientIdentityRemoved"].value) self.assertIsNotNone(parser.dicom["PixelData"]) with self.assertRaises(KeyError): - check3 = parser.dicom["StudyDate"].value + parser.dicom["StudyDate"].value def test_blank_field_keep_field_compounding_should_keep(self): """ diff --git a/deid/tests/test_utils.py b/deid/tests/test_utils.py index 1c0936a7..12c0d641 100644 --- a/deid/tests/test_utils.py +++ b/deid/tests/test_utils.py @@ -4,10 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test utils -""" - import json import os import shutil diff --git a/deid/tests/test_utils_files.py b/deid/tests/test_utils_files.py index 590a3985..ad58e2cc 100644 --- a/deid/tests/test_utils_files.py +++ b/deid/tests/test_utils_files.py @@ -4,10 +4,6 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -""" -Test files operations -""" - import os import shutil import tempfile diff --git a/deid/utils/actions.py b/deid/utils/actions.py index 5a299089..ca5b26d9 100644 --- a/deid/utils/actions.py +++ b/deid/utils/actions.py @@ -41,7 +41,7 @@ def parse_value(dicom, value, item=None, field=None, funcs=None): # There can be additional key=value pairs try: value_option, extras = value_option.split(" ", 1) - except: + except Exception: extras = "" pass diff --git a/deid/version.py b/deid/version.py index 8cdace34..d4af9e02 100644 --- a/deid/version.py +++ b/deid/version.py @@ -2,7 +2,7 @@ __copyright__ = "Copyright 2016-2022, Vanessa Sochat" __license__ = "MIT" -__version__ = "0.3.0" +__version__ = "0.3.1" AUTHOR = "Vanessa Sochat" AUTHOR_EMAIL = "vsoch@users.noreply.github.com" NAME = "deid" diff --git a/docs/README.md b/docs/README.md index 9cdf3ce4..e6c4ce06 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,7 +2,7 @@ ![assets/img/logo.png](assets/img/logo.png) -This is a documentation site for [deid](https://www.github.com/pydicom/deid). +This is a documentation site for [deid](https://www.github.com/pydicom/deid). It is part of the [pydicom](https://www.github.com/pydicom) family of tools. ## Setup diff --git a/docs/_data/links.yml b/docs/_data/links.yml index 0854ed01..b1aff6f6 100644 --- a/docs/_data/links.yml +++ b/docs/_data/links.yml @@ -14,8 +14,7 @@ navigation: url: development/ - name: Contributing url: contributing/ - + external_navigation: - name: API Documentation url: https://deid.readthedocs.io/en/latest/ - diff --git a/docs/_docs/contributing/code.md b/docs/_docs/contributing/code.md index 2918800c..4a9fad69 100644 --- a/docs/_docs/contributing/code.md +++ b/docs/_docs/contributing/code.md @@ -30,7 +30,7 @@ Generally, a custom function should accept the following variables: - item: expected to be the dictionary lookup of user provided values - field: the dicom field - value: the value to replace - + You can generally define a catch all `**kwargs` if you don't need a field. Finally, if you do provide a custom variable, you'll need to also provide a default (or exit on error if it's absolutely essential). As an example, if your custom function in the lookup is named diff --git a/docs/_docs/contributing/docs.md b/docs/_docs/contributing/docs.md index 7ceaa6ed..ea73d105 100644 --- a/docs/_docs/contributing/docs.md +++ b/docs/_docs/contributing/docs.md @@ -13,8 +13,8 @@ can easily contribute via a [pull request](https://help.github.com/articles/abou ### Installing Dependencies -Initially (on OS X), you will need to setup [Brew](http://brew.sh/) which is a -package manager for OS X and [Git](https://git-scm.com/). To install Brew and Git, +Initially (on OS X), you will need to setup [Brew](http://brew.sh/) which is a +package manager for OS X and [Git](https://git-scm.com/). To install Brew and Git, run the following commands: ```bash diff --git a/docs/_docs/development/image-format.md b/docs/_docs/development/image-format.md index afb290b2..e5246c2d 100644 --- a/docs/_docs/development/image-format.md +++ b/docs/_docs/development/image-format.md @@ -17,13 +17,13 @@ deid This folder, and others like it, should contain should contain the following files: - - **config.json** this is the default specification for how a dicom header is parsed, which primarily means additions, and a set of custom actions. + - **config.json** this is the default specification for how a dicom header is parsed, which primarily means additions, and a set of custom actions. - **__init__.py**: has the purpose of exposing module functions to the higher up folder for import. For example, the function `get_identifiers` in [header.py](header.py) is programmatically accessible via `from deid.dicom import get_identifiers` thanks to this file. If you create a new module with the equivalent functions, you should be fine to just copy this file, or import the functions directly from tasks.py in the module folder. - **header.py**: should contain functions for `get_identifiers`, which should return a dictionary with top level indexes by entity, and the value of each entity another dictionary indexed by the item ids. This data structure, if provided by the client, must be understood by the function `remove_identifiers`. -Note that, since we are working in Python, we will be using dicom headers -that are mapped from the standard to pydicom, the entire mapping which is -provided [here](https://github.com/pydicom/pydicom/blob/master/pydicom/_dicom_dict.py), +Note that, since we are working in Python, we will be using dicom headers +that are mapped from the standard to pydicom, the entire mapping which is +provided [here](https://github.com/pydicom/pydicom/blob/master/pydicom/_dicom_dict.py), and programmatically accessible via: ```python @@ -36,14 +36,14 @@ for key,entry in DicomDictionary.items(): field_names.append(entry[4]) ``` -Since there are so many, we enforce (at least for dicom) the most conservative -approach of removing header fields that the client has not asked anything special +Since there are so many, we enforce (at least for dicom) the most conservative +approach of removing header fields that the client has not asked anything special to be done for. Let's now talk about the [config.json](config.json). ## Config.json -The base of the json has two classes, and they correspond with the actions of -`get` and `put`, where a "get" is broadly the step of getting identifiers from +The base of the json has two classes, and they correspond with the actions of +`get` and `put`, where a "get" is broadly the step of getting identifiers from the data, and the "put" is putting things back (and realistically, removing a lot). Here they are, completely empty: @@ -58,9 +58,9 @@ The entire data structure isn't very large, and can be shown to you: ```python { - "get": { + "get": { + - "skip": ["PixelData"], "ids":{ "entity":"PatientID", @@ -72,7 +72,7 @@ The entire data structure isn't very large, and can be shown to you: "put":{ "actions":[ - + {"action":"ADD","field":"PatientIdentityRemoved","value": "Yes"}, ] @@ -80,24 +80,24 @@ The entire data structure isn't very large, and can be shown to you: } ``` -Note that we don't need to specify the datatypes like "PixelData" or "Columns", -or other fields related to the data. These fields are by default kept, as they +Note that we don't need to specify the datatypes like "PixelData" or "Columns", +or other fields related to the data. These fields are by default kept, as they are specific to the pixel data. For details see [this issue](https://github.com/pydicom/pydicom/issues/372). ### Get -If you read the details about get (usage for the client) see [get]({{ site.baseurl }}/getting-started/dicom-get/), -you probably see some commonality. We have identified default fields in the header -for entity and item under `['get']['ids']` (both which can be altered by the user via -a function call) and then we skip over PixelData, because we don't want to return that -for inspection, or have it in the list to include. If there are others you don't -want returned, then add them to the skip list. Have caution that the user won't see -the field returned, and likely won't ask for any action to be taken, meaning it will +If you read the details about get (usage for the client) see [get]({{ site.baseurl }}/getting-started/dicom-get/), +you probably see some commonality. We have identified default fields in the header +for entity and item under `['get']['ids']` (both which can be altered by the user via +a function call) and then we skip over PixelData, because we don't want to return that +for inspection, or have it in the list to include. If there are others you don't +want returned, then add them to the skip list. Have caution that the user won't see +the field returned, and likely won't ask for any action to be taken, meaning it will by default be blanked. ### Put -Put is primarily concerned with actions, which as they are for the user, can be -`ADD`, `KEEP`, `REMOVE`, or `BLANK`. For the default, we keep the useful pixel data, +Put is primarily concerned with actions, which as they are for the user, can be +`ADD`, `KEEP`, `REMOVE`, or `BLANK`. For the default, we keep the useful pixel data, and specify that we have removed the patient identity. diff --git a/docs/_docs/development/index.md b/docs/_docs/development/index.md index 56c33c54..341cae04 100644 --- a/docs/_docs/development/index.md +++ b/docs/_docs/development/index.md @@ -5,10 +5,11 @@ category: Development order: 1 --- -This readme is intended to explain how the functions work (on the back end) for those -wishing to create a module for a new image type. The basic idea is that each folder -(module, eg `dicom`) contains a base processing template that tells the functions to -`get_identifiers` how to process different header values for the datatype -(e.g, DICOM). +This readme is intended to explain how the functions work (on the back end) for those +wishing to create a module for a new image type. The basic idea is that each folder +(module, eg `dicom`) contains a base processing template that tells the functions to +`get_identifiers` how to process different header values for the datatype +(e.g, DICOM). - [Add an Image Format]({{ site.baseurl }}/development/add-format/) + - [Linting and Formatting]({{ site.baseurl }}/development/linting-format/) diff --git a/docs/_docs/development/linting-format.md b/docs/_docs/development/linting-format.md new file mode 100644 index 00000000..a3897336 --- /dev/null +++ b/docs/_docs/development/linting-format.md @@ -0,0 +1,28 @@ +--- +title: Linting and Formatting +category: Development +order: 3 +--- + +After installing deid to a local environment, you can use [pre-commit](https://pre-commit.com/) to help +with linting and formatting. To do that: + + +```bash +$ pip install -r .github/dev-requirements.txt +``` + +Then to run: + +```bash +$ pre-commit run --all-files +``` + +You can also install as a hook: + +```bash +$ pre-commit install +``` + +And it will run always before you commit. This is the same linting +we use in our testing as well. diff --git a/docs/_docs/examples/client.md b/docs/_docs/examples/client.md index 322f3124..7bd7a769 100644 --- a/docs/_docs/examples/client.md +++ b/docs/_docs/examples/client.md @@ -146,10 +146,10 @@ DEBUG Adding REPLACE PatientID var:id DEBUG Adding REPLACE SOPInstanceUID var:source_id ``` -and the file we are reading looks like this. It's very intuitive, we have groups of -filters (more specific at the top and moving down to more general) and each is named -("dangerouscookie" and "bigimage"). Within each filter we have one criteria group, -with a "+" indicating and. We could have more groups under each, but happen to not +and the file we are reading looks like this. It's very intuitive, we have groups of +filters (more specific at the top and moving down to more general) and each is named +("dangerouscookie" and "bigimage"). Within each filter we have one criteria group, +with a "+" indicating and. We could have more groups under each, but happen to not for this example. ``` @@ -177,18 +177,18 @@ REPLACE PatientID var:id REPLACE SOPInstanceUID var:source_id ``` -We won't be using the header section for this example, but for your FYI, -this is the recipe for how we would want to replace information in the header, +We won't be using the header section for this example, but for your FYI, +this is the recipe for how we would want to replace information in the header, if we were cleaning the headers. Right now we are just filtering images to - flag those that might have PHI. Let's very strictly walk through the logic + flag those that might have PHI. Let's very strictly walk through the logic that will be taken above: 1. If the header contains field PatientSex "M" (Male), and OperatorsName is not "bold bread," we flag. Otherwise, keep going. 2. If the header has field Rows 2048 and Columns 1536 we flag. -The flag that is done first (more specific) is the final decision. -This means that you should have your known coordinates of PHI (eg, specific -modality, manufacturer, etc) first, and followed by more general estimates of +The flag that is done first (more specific) is the final decision. +This means that you should have your known coordinates of PHI (eg, specific +modality, manufacturer, etc) first, and followed by more general estimates of PHI. Likely a later group will create flags for more manual inspection. Now let's run the filter! First just within python: @@ -227,7 +227,7 @@ image1.dcm:fragrant pond - F image5.dcm:curly darkness - M <--- FLAGGED ``` -Seems to be! The data structure returned gives us programmatic access to the groups, +Seems to be! The data structure returned gives us programmatic access to the groups, including list of clean (top), list of flagged and flag list name (flagged) and given flagged, a lookup dictionary with reasons: ```python diff --git a/docs/_docs/examples/deid-dataset.md b/docs/_docs/examples/deid-dataset.md index fce58e32..81fc00b3 100644 --- a/docs/_docs/examples/deid-dataset.md +++ b/docs/_docs/examples/deid-dataset.md @@ -121,13 +121,13 @@ from Crypto.Hash import SHA512 from datetime import datetime class DeidDataset: - """This class allows to pseudonymize an instance of + """This class allows to pseudonymize an instance of pydicom.Dataset with our custom recipe and functions. """ def __init__(self, secret_salt: str, recipe_path: str): """New instance of our pseudonymizer class. - :param secret_salt: a random string that makes the + :param secret_salt: a random string that makes the hashing harder to break. :param recipe_path: path to our deid recipe. """ @@ -149,14 +149,14 @@ class DeidDataset: parser.parse(strip_sequences=True, remove_private=True) return parser.dicom - # All registered functions that are used in the recipe must + # All registered functions that are used in the recipe must # receive the arguments: `item`, `value`, `field`, `dicom` - + def deid_hash_func(self, item, value, field, dicom) -> str: """Performs self.hash to field.element.value""" val = field.element.value return self.hash(str(val)) - + @staticmethod def remove_day(item, value, field, dicom) -> str: """Removes the day from a DT field in the deid framework""" @@ -179,7 +179,7 @@ class DeidDataset: def hash(self, msg: str) -> str: """ - :param msg: message that we want to encrypt, + :param msg: message that we want to encrypt, normally the PatientID or the StudyID. :return: the encrypted message as hexdigest (in characters from '0' to '9' and 'a' to 'f') @@ -189,7 +189,7 @@ class DeidDataset: bytes_str = bytes(f"{self.secret_salt}{msg}", "utf-8") h.update(bytes_str) return str(h.hexdigest()) - + # Load the pydicom Dataset import json @@ -251,4 +251,4 @@ Dataset after pseudonymization (0012, 0062) Patient Identity Removed CS: 'Yes' (0012, 0063) De-identification Method LO: 'my_deid_recipe.dicom.v1.0' (0020, 0010) Study ID SH: 'ae4b477e5709d0c1f746e0adc9ab552fee100b91416f9f3a04037e999077e823' -``` \ No newline at end of file +``` diff --git a/docs/_docs/examples/func-replace.md b/docs/_docs/examples/func-replace.md index 883ad6e0..80cca6be 100644 --- a/docs/_docs/examples/func-replace.md +++ b/docs/_docs/examples/func-replace.md @@ -71,8 +71,8 @@ items = get_identifiers(dicom_files, expand_sequences=False) ``` -The function we will use for the example will perform an action to generate a uid, -but you can also use it to communicate with databases, APIs, or do something like +The function we will use for the example will perform an action to generate a uid, +but you can also use it to communicate with databases, APIs, or do something like save the original (and newly generated one) in some (IRB approvied) place @@ -189,8 +189,8 @@ def generate_uid(item, value, field, dicom): return prefix + "-" + sliced_uid ``` -As stated in the docstring, you can expect it to be passed the dictionary of -items extracted from the dicom (and your function) and variables, the +As stated in the docstring, you can expect it to be passed the dictionary of +items extracted from the dicom (and your function) and variables, the original value (func:generate_uid) and the field name you are applying it to. @@ -215,18 +215,18 @@ interactive session and have all the variables available to you for inspection. For example: ```python -item +item # {'(0008, 0005)': (0008, 0005) Specific Character Set CS: 'ISO_IR 100' [SpecificCharacterSet], # ... # 'generate_uid': } -value +value # 'func:generate_uid' -field +field # (0020, 000d) Study Instance UID UI: 1.2.276.0.7230010.3.1.2.8323329.5329.1495927169.580350 [StudyInstanceUID] -dicom +dicom # (0008, 0005) Specific Character Set CS: 'ISO_IR 100' ... ``` @@ -237,7 +237,7 @@ on how it is used internally, so you should always check. ## Update Your Items -How do we update the items? Remember, the action is: +How do we update the items? Remember, the action is: ``` REPLACE StudyInstanceUID func:generate_uid @@ -252,8 +252,8 @@ for item in items: ## Replace identifiers -We are ready to go! Now let's generate the cleaned files! It will output to a -temporary directory. +We are ready to go! Now let's generate the cleaned files! It will output to a +temporary directory. ```python cleaned_files = replace_identifiers(dicom_files=dicom_files, @@ -276,7 +276,7 @@ See [here](https://github.com/pydicom/deid/tree/master/examples/dicom/header-man to see if the replacement was done: ```python -cleaned_files[0] +cleaned_files[0] (0020, 000d) Study Instance UID UI: studyinstanceuid-1.2.826.0.1.3680043.10.188.1803528571851574950019323462792270863 (0020, 000e) Series Instance UID UI: seriesinstanceuid-1.2.826.0.1.3680043.10.188.1218768560803332968447018964651707696 (0020, 0052) Frame of Reference UID UI: frameofreferenceuid-1.2.826.0.1.3680043.10.188.3138524385829221974514732538424409758 diff --git a/docs/_docs/examples/func-sequence-replace.md b/docs/_docs/examples/func-sequence-replace.md index 13dc9dc6..322bda71 100644 --- a/docs/_docs/examples/func-sequence-replace.md +++ b/docs/_docs/examples/func-sequence-replace.md @@ -4,7 +4,7 @@ category: Examples order: 4 --- -The code and files for this example can be found [here](https://github.com/pydicom/deid/tree/master/examples/dicom/header-manipulation/func-sequence-replace/). +The code and files for this example can be found [here](https://github.com/pydicom/deid/tree/master/examples/dicom/header-manipulation/func-sequence-replace/). For this example, we want to replace values that are nested (in sequences). This operation is available for deid versions 0.1.34 and later, and currently we support `REPLACE`, and `BLANK`. @@ -27,7 +27,7 @@ dicom_files = ['MR.dcm'] items = get_identifiers(dicom_files) ``` -For each item (indexed by the dicom file name), sequences +For each item (indexed by the dicom file name), sequences are flattened out in the data structure. For example: ```python @@ -40,8 +40,8 @@ are flattened out in the data structure. For example: 'ReferencedPerformedProcedureStepSequence__ReferencedSOPInstanceUID': 'xxxxxxxx', ``` -The function we will use for the example will perform an action to generate a uid, -but you can also use it to communicate with databases, APIs, or do something like +The function we will use for the example will perform an action to generate a uid, +but you can also use it to communicate with databases, APIs, or do something like save the original (and newly generated one) in some (IRB approvied) place @@ -98,8 +98,8 @@ recipe.get_actions(field='InstanceCreationDate') recipe.get_actions(field='PatientID', action="REMOVE") ``` -Our recipe instance is ready to go. From the above we are saying we want to replace the -`InstanceCreationDate` field with the output from the generate_uid function, +Our recipe instance is ready to go. From the above we are saying we want to replace the +`InstanceCreationDate` field with the output from the generate_uid function, which is expected in the item dict. Let's write that next. @@ -150,8 +150,8 @@ def generate_uid(item, value, field, dicom): return prefix + "-" + sliced_uid ``` -As stated in the docstring, you can expect it to be passed the dictionary of -items extracted from the dicom (and your function) and variables, the +As stated in the docstring, you can expect it to be passed the dictionary of +items extracted from the dicom (and your function) and variables, the original value (func:generate_uid) and the field name you are applying it to. @@ -176,18 +176,18 @@ interactive session and have all the variables available to you for inspection. For example: ```python -item +item # {'(0008, 0005)': (0008, 0005) Specific Character Set CS: 'ISO_IR 100' [SpecificCharacterSet], # ... # 'generate_uid': } -value +value # 'func:generate_uid' -field +field # (0020, 000d) Study Instance UID UI: 1.2.276.0.7230010.3.1.2.8323329.5329.1495927169.580350 [StudyInstanceUID] -dicom +dicom # (0008, 0005) Specific Character Set CS: 'ISO_IR 100' ... ``` @@ -198,7 +198,7 @@ on how it is used internally, so you should always check. ## Update Your Items -How do we update the items? Remember, the action is: +How do we update the items? Remember, the action is: ``` REPLACE InstanceCreationDate func:generate_uid @@ -213,7 +213,7 @@ for item in items: ## Replace identifiers -We are ready to go! Now let's generate the cleaned files! It will output to a +We are ready to go! Now let's generate the cleaned files! It will output to a temporary directory. Since we want to replace nested sequences, we need to set `strip_sequences` to False. diff --git a/docs/_docs/examples/header-expanders.md b/docs/_docs/examples/header-expanders.md index 993692a1..26f2dc76 100644 --- a/docs/_docs/examples/header-expanders.md +++ b/docs/_docs/examples/header-expanders.md @@ -5,7 +5,7 @@ order: 5 --- This example will walk through how to use header expansion -to select more than one field from a dicom header to apply an action to. +to select more than one field from a dicom header to apply an action to. Thanks to [@howardpchen](https://github.com/howardpchen) for contributing this idea in [this issue](https://github.com/pydicom/deid/issues/87). We will first show examples that you can write into [a deid recipe](https://pydicom.github.io/deid/examples/recipe/) to keep a record of your dicom header edits. We will then show the same (and more advanced) actions working with expanders directly in Python. Let's go! Let's say I want to: @@ -62,7 +62,7 @@ REPLACE except:LoserField func:my_special_function ## Python Examples If you want to use the expanders in your code, that's easy too! Here -are the same examples. Let's first start with reading in a dicom file, +are the same examples. Let's first start with reading in a dicom file, such as one of the dicom-cookies examples provided with deid. @@ -87,7 +87,7 @@ Let's get those cookies! ```python base = get_dataset('dicom-cookies') -dicom_files = list(get_files(base)) +dicom_files = list(get_files(base)) ``` `dicom_files` is a list of the complete paths for 7 dicom cookie examples. @@ -111,8 +111,8 @@ from deid.dicom.fields import expand_field_expression ``` None of the actions (BLANK, JITTER, etc.) are relevant here; we just want to get back the list of -fields that meet some criteria. Given an action, these fields would be -passed on to the next step in deid to handle the action. +fields that meet some criteria. Given an action, these fields would be +passed on to the next step in deid to handle the action. You could also use this function to interactively explore the header data, or another purpose. @@ -263,7 +263,7 @@ def pusheenize(item, value, field, dicom): return value ``` -To not forget that we are showing examples with expand_field_expression, this quick +To not forget that we are showing examples with expand_field_expression, this quick snippet simple shows that the list of field names is the entire set included with the dicom. @@ -282,16 +282,16 @@ for item in items: items[item]['pusheenize'] = pusheenize ``` - + ### 3. Replace Identifiers given that our function is in the python working environment, we would have extracted identifiers like this. We don't want to save them -so we set save to False. If we set save to True, they would be saved to a temporary directory. +so we set save to False. If we set save to True, they would be saved to a temporary directory. ```python from deid.dicom import replace_identifiers cleaned_files = replace_identifiers(dicom_files=dicom_files, - deid=recipe, + deid=recipe, save=False, ids=items) ``` @@ -300,7 +300,7 @@ Let's look at the first cleaned dicom. Is it pusheenized? ```python In [78]: cleaned_files[0] -Out[78]: +Out[78]: (0008, 0005) Specific Character Set CS: 'ISO_IR 100' (0008, 0016) SOP Class UID UI: Secondary Capture Image Storage (0008, 0018) SOP Instance UID UI: 1.2.276.0.7230010.3.1.4.8323329.5323.1495927169.335276 diff --git a/docs/_docs/examples/recipe.md b/docs/_docs/examples/recipe.md index 7d3758c0..39916d56 100644 --- a/docs/_docs/examples/recipe.md +++ b/docs/_docs/examples/recipe.md @@ -4,20 +4,20 @@ category: Examples order: 2 --- -As we've discussed, the basic actions of using header filters to flag images, -and performing actions on headers (for replacement), are controlled by a text file called -a deid recipe. If you want a reminder about how to write this text file, -[read here]({{ site.baseurl }}/getting-started/dicom-config), and we hope to at some -point have an interactive way as well (let us know your feedback!). -The basic gist of the file is that we have different sections. +As we've discussed, the basic actions of using header filters to flag images, +and performing actions on headers (for replacement), are controlled by a text file called +a deid recipe. If you want a reminder about how to write this text file, +[read here]({{ site.baseurl }}/getting-started/dicom-config), and we hope to at some +point have an interactive way as well (let us know your feedback!). +The basic gist of the file is that we have different sections. - In the `%header` section we have a list of actions to take on header fields - We can define groups, either field names `%fields` or values from fields `%values` to reference in header actions - In the `%filter` section we have lists of criteria to check image headers against, and given a match, we flag the image as belonging to the group. -In this small tutorial, we will walk through the basic steps of loading a recipe, -interacting with it, and then using it to replace identifiers. If you want to -jump in, then go straight to the [script](https://github.com/pydicom/deid/blob/master/examples/dicom/recipe/deid-dicom-example.py) +In this small tutorial, we will walk through the basic steps of loading a recipe, +interacting with it, and then using it to replace identifiers. If you want to +jump in, then go straight to the [script](https://github.com/pydicom/deid/blob/master/examples/dicom/recipe/deid-dicom-example.py) that describes this example. @@ -38,8 +38,8 @@ The following sections will describe creating and combining recipes. ### Create a DeidRecipe -We will start with how to work with a `DeidRecipe` object. If you aren't interested -in this use case or just want to use a provided deid recipe file, continue to the +We will start with how to work with a `DeidRecipe` object. If you aren't interested +in this use case or just want to use a provided deid recipe file, continue to the next section. We start by importing the class, and instantiating it. @@ -51,14 +51,14 @@ WARNING No specification, loading default base deid.dicom ``` Since we didn't load a custom deid recipe text file, we get a default warning message that -a default is being use. That default is a [dicom base](https://github.com/pydicom/deid/blob/master/deid/data/deid.dicom) +a default is being use. That default is a [dicom base](https://github.com/pydicom/deid/blob/master/deid/data/deid.dicom) provided by the library. If you want to see the raw data structure that is loaded, look here: ``` recipe.deid ``` -You can also double check the recipe format. We currently only support dicom, +You can also double check the recipe format. We currently only support dicom, but this could in the future be other image formats (seriously, open an issue)! @@ -67,9 +67,9 @@ recipe.get_format() # dicom ``` -Note that validation of this structure happens at load time. If something is -incorrectly labeled or formatted, you will get an error message and it will -fail to load. You can also provide your own deid recipe file, and in +Note that validation of this structure happens at load time. If something is +incorrectly labeled or formatted, you will get an error message and it will +fail to load. You can also provide your own deid recipe file, and in doing so, you won't load the default. Here is one from our examples folder @@ -85,24 +85,24 @@ recipe = DeidRecipe(deid=deid_file) ``` I would strongly recommended starting with an example, and building your custom -recipe from it. If you have an example that you think others would find useful, +recipe from it. If you have an example that you think others would find useful, please contribute it to the repository in the examples folder. ### Combine Recipes -You can also choose to load the default base with your own recipe. In this action, -the two recipes are combined, with any conflict (an overlap in the second) being -given preference. For example, if the first deid you load removes a field and -the second adds the same field, the final result will have it added. -Keep this in mind and take care when combining recipes for this reason. +You can also choose to load the default base with your own recipe. In this action, +the two recipes are combined, with any conflict (an overlap in the second) being +given preference. For example, if the first deid you load removes a field and +the second adds the same field, the final result will have it added. +Keep this in mind and take care when combining recipes for this reason. Here is how it would look to load the default base *and* provide you custom file: ``` recipe = DeidRecipe(deid=deid_file, base=True) ``` -You can also specify a different base entirely, and this would be equivalent to +You can also specify a different base entirely, and this would be equivalent to just providing a list of deid files: ``` @@ -110,8 +110,8 @@ recipe = DeidRecipe(deid=[deid_file1, deid_file2]) recipe = DeidRecipe(deid=deid_file1, base=True, default_base=deid_file2) ``` -When we load bases, we are looking in the [data folder](https://github.com/pydicom/deid/tree/master/deid/data) -provided by the module. The base is the deid. in this folder. +When we load bases, we are looking in the [data folder](https://github.com/pydicom/deid/tree/master/deid/data) +provided by the module. The base is the deid. in this folder. So for example, if we wanted to use `deid/data/deid.dicom.chest.xray` we would specify: ``` @@ -125,8 +125,8 @@ recipe = DeidRecipe(deid='dicom.xray.chest') recipe = DeidRecipe(deid='dicom.xray.chest', base=True) ``` -This data folder is to encourage sharing! It often is a lot of work to develop -a criteria specific for your group or interest. If you have a general recipe +This data folder is to encourage sharing! It often is a lot of work to develop +a criteria specific for your group or interest. If you have a general recipe that others might use, please [contribute it](https://github.com/pydicom/deid/blob/master/CONTRIBUTING.md#pull-request-process). @@ -141,9 +141,9 @@ groups for lists of values or fields. The process of flagging images comes down to writing a set of filters to check if each image meets some criteria of interest. For example, I might create a filter called "xray" that is triggered when the Modality is CT or XR. -The filters are found in the `%filter` sections of the deid recipe. +The filters are found in the `%filter` sections of the deid recipe. -First, to get a complete dict of all filters (a dictionary with keys corresponding +First, to get a complete dict of all filters (a dictionary with keys corresponding to filter group names and values the filters themselves) we can do the following actions: ```python @@ -162,12 +162,12 @@ recipe.get_filters('blacklist') A header action is a step (e.g., replace, remove, blank) to be applied to a dicom image header. The headers are also part of the deid recipe. You don't need to necessarily use header actions and filters at the same time, but since -it's nice to keep things tidy for a single dataset using a shared file, we support -having them both represented in the same file. You could just as easily keep +it's nice to keep things tidy for a single dataset using a shared file, we support +having them both represented in the same file. You could just as easily keep them in separate files to load separately - a DeidRecipe is not required to have header actions and/or filters. -First, let's load the default deid recipe file (deid.dicom in the data folder) +First, let's load the default deid recipe file (deid.dicom in the data folder) that we know has a `%header` section. ``` @@ -205,11 +205,11 @@ recipe.get_actions(field='PatientID', action="REMOVE") # [{'action': 'REMOVE', 'field': 'PatientID'}] # If you have lists of fields or values defined, you can retrieve them too -recipe.get_fields_lists() +recipe.get_fields_lists() # OrderedDict([('instance_fields', # [{'action': 'FIELD', 'field': 'contains:Instance'}])]) -recipe.get_values_lists() +recipe.get_values_lists() # OrderedDict([('cookie_names', # [{'action': 'SPLIT', # 'field': 'PatientID', @@ -217,7 +217,7 @@ recipe.get_values_lists() # ('operator_names', # [{'action': 'FIELD', 'field': 'startswith:Operator'}])]) -recipe.get_values_lists("cookie_names") +recipe.get_values_lists("cookie_names") # [{'action': 'SPLIT', 'field': 'PatientID', 'value': 'by="^";minlength=4'}] ``` @@ -282,12 +282,12 @@ The above says that we are going to: We have 7 dicom cookie images we loaded above, so we have two options. We can either loop through the dictionary of ids and update values (in this case, -adding values to be used as new variables) or we can make a new datastructure. +adding values to be used as new variables) or we can make a new datastructure. Let's be lazy and just update the extracted ones ```python updated_ids = dict(); count=0 -for image, fields in ids.items(): +for image, fields in ids.items(): fields['id'] = 'cookiemonster' fields['source_id'] = "cookiemonster-image-%s" %(count) updated_ids[image] = fields @@ -348,7 +348,7 @@ cleaned_files = replace_identifiers(dicom_files=dicom_files, ## Groups -More advanced usage of header actions would be to define a group of values (the content of the +More advanced usage of header actions would be to define a group of values (the content of the header fields) or field names (the names themselves) to use in an action. This corresponds to `%fields` (a list of fields) and `%values` (a list of values from fields) to parse at the onset of the dicom load, and use later in a recipe. Here is how that might look diff --git a/docs/_docs/getting-started/dicom-config.md b/docs/_docs/getting-started/dicom-config.md index a0357828..93462a00 100644 --- a/docs/_docs/getting-started/dicom-config.md +++ b/docs/_docs/getting-started/dicom-config.md @@ -36,7 +36,7 @@ Let's first discuss each of the sections. A section is a part of the recipe that starts with a "%". You can think of a section as a chunk of text that is parsed for some purpose. For example, `%filter` is a section where it's expected that you've defined filters, and -`%header` is expected to have actions to update and change headers. +`%header` is expected to have actions to update and change headers. | Section | Description | Example | @@ -65,7 +65,7 @@ action corresponds with either: - an action applied to a header field, like "REPLACE FieldA with value B" or - replacing pixels in the image with a black box to hide text and other identifiers -For reading more about how the Deid software does this by way of a file called +For reading more about how the Deid software does this by way of a file called a deid recipe, read about deid [recipe filters]({{ site.baseurl }}/user-docs/recipe-filters/). @@ -77,10 +77,10 @@ The general application flow to clean headers looks like this: [define actions] -> [get identifiers] --> [update identifiers] --> [replace identifiers] ``` -And then optionally save the updated files! +And then optionally save the updated files! More detail is provided about cleaning headers in the [recipe headers]({{ site.baseurl }}/user-docs/recipe-headers/) -pages. +pages. > Where do I go from here? diff --git a/docs/_docs/getting-started/dicom-get.md b/docs/_docs/getting-started/dicom-get.md index 6d3a572a..34510420 100644 --- a/docs/_docs/getting-started/dicom-get.md +++ b/docs/_docs/getting-started/dicom-get.md @@ -17,8 +17,8 @@ $ pip install deid-data ## Get Identifiers -A get request using the deid module will return a data structure with headers found in a particular dataset. -Let's walk through these steps. As we did in the [loading]({{ site.baseurl }}/getting-started/dicom-loading), +A get request using the deid module will return a data structure with headers found in a particular dataset. +Let's walk through these steps. As we did in the [loading]({{ site.baseurl }}/getting-started/dicom-loading), the first step was to load a dicom dataset: @@ -30,7 +30,7 @@ base = get_dataset("dicom-cookies") dicom_files = list(get_files(base)) ``` -We now have our small dataset that we want to de-identify! The first step is to get +We now have our small dataset that we want to de-identify! The first step is to get the identifiers. By default, we will return all of them. That call will look like this: ```python @@ -43,7 +43,7 @@ Within each entry, the value is another dictionary with an expanded string of the tag. For example: ``` -ids[dicom_files[0]] +ids[dicom_files[0]] {'(0008, 0005)': (0008, 0005) Specific Character Set CS: 'ISO_IR 100' [SpecificCharacterSet], '(0008, 0016)': (0008, 0016) SOP Class UID UI: Secondary Capture Image Storage [SOPClassUID], '(0008, 0018)': (0008, 0018) SOP Instance UID UI: 1.2.276.0.7230010.3.1.4.8323329.5329.1495927169.580351 [SOPInstanceUID], @@ -82,7 +82,7 @@ ids[dicom_files[0]] If there is a nested tag, you'll see it with the format `(7fe0, 0010)__(0080, 0012)`. If there is a nested sequence, you'll see the index provided in that same format. For example, -`(7fe0, 0010)__0__(0080, 0012)` counts as the first element of a sequence, +`(7fe0, 0010)__0__(0080, 0012)` counts as the first element of a sequence, and `(7fe0, 0010)__1__(0080, 0012)` the second. We start counting at 0, we aren't barbarians! @@ -95,19 +95,19 @@ parsing. For example: ```python field = ids[dicom_files[0]]['(0010, 0010)'] -field.element +field.element (0010, 0010) Patient's Name PN: 'falling disk' -field.name +field.name 'PatientName' -field.uid +field.uid '(0010, 0010)' ``` The field.element is what you would get if you indexed the dicom Dataset at dicom.get("PatientName"). The name refers to the keyword (which, if there -is nesting, will include that. For example, a Sequence with header value `AdditionalData` +is nesting, will include that. For example, a Sequence with header value `AdditionalData` and item `Modality` will be returned as `AdditionalData_Modality`, and this name string is used to help with filters. The uid would also include the index of the sequence, since we use it to index into the @@ -124,22 +124,22 @@ At this point, you have a few options: ### Recipe Interaction -If you want to write a recipe to perform a bunch of custom actions on your +If you want to write a recipe to perform a bunch of custom actions on your dicom files, you should read about how to [work with recipes]({{ site.basurl }}/examples/recipe/). ### Clean Pixels -It's likely that the pixels in the images have burned in annotations, and we can -use the header data to flag these images. Thus, before you replace identifiers, -you probably want to do this. We have a DicomCleaner class that can flag images -for PHI based on matching some header filter criteria, and you can -[read about that here]({{site.baseurl}}/getting-started/dicom-pixels/). +It's likely that the pixels in the images have burned in annotations, and we can +use the header data to flag these images. Thus, before you replace identifiers, +you probably want to do this. We have a DicomCleaner class that can flag images +for PHI based on matching some header filter criteria, and you can +[read about that here]({{site.baseurl}}/getting-started/dicom-pixels/). ### Update Identifiers Once you are finished with any customization of the recipe, updating identifiers, - and/or potentially flagging and quarantining images that have PHI, you should be + and/or potentially flagging and quarantining images that have PHI, you should be ready to [replace (PUT)]({{ site.baseurl}}/getting-started/dicom-put/) with new fields based on the deid recipe. diff --git a/docs/_docs/getting-started/dicom-loading.md b/docs/_docs/getting-started/dicom-loading.md index 7da66878..f95a6eb4 100644 --- a/docs/_docs/getting-started/dicom-loading.md +++ b/docs/_docs/getting-started/dicom-loading.md @@ -18,11 +18,11 @@ $ pip install deid-data ## Loading -While they are different file organizations for dicom, we are going to take a simple -approach of assuming some top level directory with some number of files within -(yes, including subdirectories). For example, if you retrieved your data using a -tool like [dcmqr](https://dcm4che.atlassian.net/wiki/display/d2/dcmqr) with a -`C-MOVE`, then you might have a flat directory structure. Sometimes the +While they are different file organizations for dicom, we are going to take a simple +approach of assuming some top level directory with some number of files within +(yes, including subdirectories). For example, if you retrieved your data using a +tool like [dcmqr](https://dcm4che.atlassian.net/wiki/display/d2/dcmqr) with a +`C-MOVE`, then you might have a flat directory structure. Sometimes the files won't have an extension (for example, being named by a `SOPInstanceUID`. ```bash @@ -37,8 +37,8 @@ deid/data/dicom-cookies/ └── image7.dcm ``` -It doesn't actually matter so much how your data is structured, -you can use any method that you like to. You could technically +It doesn't actually matter so much how your data is structured, +you can use any method that you like to. You could technically just use `os.listdir` or `glob`: @@ -67,7 +67,7 @@ os.listdir(base) 'image5.dcm'] ``` -Notice anything that might trigger a bug with the above? You probably +Notice anything that might trigger a bug with the above? You probably should ask for an absolute path. ```python @@ -84,15 +84,15 @@ for root, folders, files in os.walk(base): ``` We provide a few more robust functions to find datasets, because it's usually the case that you want - to match a pattern of file, have subfolders, or want a validation + to match a pattern of file, have subfolders, or want a validation done to be sure that each file is dicom. ## Find Datasets -The function that we have provided will find all datasets matching some pattern -(or all files recursively in a folder). You simply need to provide a list of top folders, -a list of files and folders, or just files to start. For the purposes of this +The function that we have provided will find all datasets matching some pattern +(or all files recursively in a folder). You simply need to provide a list of top folders, +a list of files and folders, or just files to start. For the purposes of this walkthrough, we will load data folders that are provided with the application. ```python @@ -103,7 +103,7 @@ base '/home/vanessa/anaconda3/lib/python3.5/site-packages/som-0.1.1-py3.5.egg/som/data/dicom-cookies' ``` -In the above, all we've done it retrieved the full path for a +In the above, all we've done it retrieved the full path for a folder of dicom files. Let's try to read in the data: @@ -116,7 +116,7 @@ DEBUG Checking 7 dicom files for validation. Found 7 valid dicom files ``` -We can also specify to not do the check, if we are absolutely sure. +We can also specify to not do the check, if we are absolutely sure. For larger datasets this might speed up processing a little bit. ```python @@ -124,7 +124,7 @@ dicom_files = list(get_files(base,check=False)) DEBUG Found 7 contender files in dicom-cookies ``` -We can also give it a particular pattern to match. Since these files all end with +We can also give it a particular pattern to match. Since these files all end with `.dcm`, that's not so useful. Let's give a pattern to just match `image1.dcm`: @@ -135,5 +135,5 @@ DEBUG Checking 1 dicom files for validation. Found 1 valid dicom files ``` -At this point, you should have a list of dicom files. You might now want +At this point, you should have a list of dicom files. You might now want to [configure]({{ site.baseurl }}/getting-started/dicom-config) your deidentifation. diff --git a/docs/_docs/getting-started/dicom-pixels.md b/docs/_docs/getting-started/dicom-pixels.md index e4ae0250..60ac8a85 100644 --- a/docs/_docs/getting-started/dicom-pixels.md +++ b/docs/_docs/getting-started/dicom-pixels.md @@ -4,10 +4,10 @@ category: Getting Started order: 6 --- -At this point, you've possibly obtained identifiers via a [get]({{ site.baseurl }}/getting-started/dicom-get) -action, and you want to figure out which of your images have pixels burned -into the data. If you don't want the detalis, jump into our -[example script](https://github.com/pydicom/deid/blob/master/examples/dicom/pixels/run-cleaner-client.py). +At this point, you've possibly obtained identifiers via a [get]({{ site.baseurl }}/getting-started/dicom-get) +action, and you want to figure out which of your images have pixels burned +into the data. If you don't want the detalis, jump into our +[example script](https://github.com/pydicom/deid/blob/master/examples/dicom/pixels/run-cleaner-client.py). Here we will walk through how this cleaner was derived, and how it works. - [Data](#data) @@ -32,17 +32,17 @@ $ pip install deid-data ## Inspiration from CTP -Flagging images with potentially having burned in PHI is based on a well established -rule-based approach. We know a concrete list of header fields and known locations +Flagging images with potentially having burned in PHI is based on a well established +rule-based approach. We know a concrete list of header fields and known locations with PHI associated with fields in the header, and we can check these fields in any -files and then perform cleaning if there is a match. This approach is based on the -MIRCTP functions to [filter DICOM](http://mircwiki.rsna.org/index.php?title=The_CTP_DICOM_Filter) -and then [Anonymize](http://mircwiki.rsna.org/index.php?title=The_CTP_DICOM_Pixel_Anonymizer). -The [DicomPixelAnonymizer.script](https://github.com/johnperry/CTP/blob/master/source/files/scripts/DicomPixelAnonymizer.script) -is a rule based list of known machine and modality types, and specific locations -in the pixels where annotations are commonly found. The -[BurnedInPixels.script](https://github.com/johnperry/CTP/blob/master/source/files/scripts/BurnedInPixelsFilter.script) -is a set of filters that, given that an image passes through them, it continues processing. +files and then perform cleaning if there is a match. This approach is based on the +MIRCTP functions to [filter DICOM](http://mircwiki.rsna.org/index.php?title=The_CTP_DICOM_Filter) +and then [Anonymize](http://mircwiki.rsna.org/index.php?title=The_CTP_DICOM_Pixel_Anonymizer). +The [DicomPixelAnonymizer.script](https://github.com/johnperry/CTP/blob/master/source/files/scripts/DicomPixelAnonymizer.script) +is a rule based list of known machine and modality types, and specific locations +in the pixels where annotations are commonly found. The +[BurnedInPixels.script](https://github.com/johnperry/CTP/blob/master/source/files/scripts/BurnedInPixelsFilter.script) +is a set of filters that, given that an image passes through them, it continues processing. If it fails, then we flag it. If we look at the script above, we see the following: ``` @@ -56,35 +56,35 @@ If it fails, then we flag it. If we look at the script above, we see the followi # ![0028,0301].contains("YES") BurnedInAnnotation is not YES ``` -and I've provided a "human friendly" translation of the rules. The `!` operator indicates a `not`, -and the `*` indicates `and`. You can imagine an image passing through those tests, and if it -makes it all the way through, it's considered ok. If any of the tests fail, then it -gets flagged for PHI (Burned Annotations) and is quarantined. Thus, we can read +and I've provided a "human friendly" translation of the rules. The `!` operator indicates a `not`, +and the `*` indicates `and`. You can imagine an image passing through those tests, and if it +makes it all the way through, it's considered ok. If any of the tests fail, then it +gets flagged for PHI (Burned Annotations) and is quarantined. Thus, we can read through the dicom fields and summarize the above as: We continue processing given that: - Image was not saved with some secondary software or device - Image is not flagged to have burned pixels -If we look at the [DicomPixelAnonymizer.script](https://github.com/johnperry/CTP/blob/master/source/files/scripts/DicomPixelAnonymizer.script), -it also contains criteria (and additionally, locations) for pixel areas that are known/likely +If we look at the [DicomPixelAnonymizer.script](https://github.com/johnperry/CTP/blob/master/source/files/scripts/DicomPixelAnonymizer.script), +it also contains criteria (and additionally, locations) for pixel areas that are known/likely to have annotations. The general format looks like this: ```console { signature } (region) (region) ... (region) ``` -and the signature looks similar to an expression used in the `BurnedInPixels.script`, +and the signature looks similar to an expression used in the `BurnedInPixels.script`, but the difference is that groups of logic are then paired with one or more regions: ```console -{ Modality.equals("CT") - * Manufacturer.containsIgnoreCase("manufacturer1") +{ Modality.equals("CT") + * Manufacturer.containsIgnoreCase("manufacturer1") * ManufacturerModelName.containsIgnoreCase("modelA") } (0,0,100,20) (480,200,32,250) ``` -The expression above would say: +The expression above would say: The pixels with bounding boxes (0,0,100,20) and (480,200,32,250) should be removed if: - the modality is CT AND @@ -92,32 +92,32 @@ The pixels with bounding boxes (0,0,100,20) and (480,200,32,250) should be remov - the Manufacturer model name text contains "modelA" (and ignore the case) -I'm not entirely sure why these two are separate (as both seem to indicate a flag -for an image having PHI) but likely it's because the first group (`BurnedInPixels.script`) -indicates header fields that are likely to indicate annotation, but don't -carry any obvious mapping to a location. We can think of both as a set of filters, -some with a clear location, and others not. TLDR: the second file (`DicomPixelAnonymizer.script`) +I'm not entirely sure why these two are separate (as both seem to indicate a flag +for an image having PHI) but likely it's because the first group (`BurnedInPixels.script`) +indicates header fields that are likely to indicate annotation, but don't +carry any obvious mapping to a location. We can think of both as a set of filters, +some with a clear location, and others not. TLDR: the second file (`DicomPixelAnonymizer.script`) has both header fields and locations. ## Deid Implementation -We have a set of pixel functions that mirror the functionality of MIRCTP, -and we take a similar approach of deriving the rules for this process from a +We have a set of pixel functions that mirror the functionality of MIRCTP, +and we take a similar approach of deriving the rules for this process from a deid recipe. Our implementeation of a [DicomCleaner](https://github.com/pydicom/deid/blob/master/deid/dicom/pixels/clean.py#L35) generally works as follows: 1. The user initializes a [Recipe](recipe.md) to configure detecting images with PHI (and possibly cleaning). The recipe has two parts - a set of filters to run over the headers to estimate if an image has burned in pixels (a section that starts with `%filter`), and a list of header cleaning rules (`%header`). 2. The recipe is used to categorize the images into groups based on the defined lists, or to clean the data. 3. The user selects some subset of images to continue forward with replacement of identifiers. -To jump right in to using the Dicom Cleaner, see our [example script](https://github.com/pydicom/deid/blob/master/examples/dicom/pixels/run-cleaner-client.py). +To jump right in to using the Dicom Cleaner, see our [example script](https://github.com/pydicom/deid/blob/master/examples/dicom/pixels/run-cleaner-client.py). We will walk through the basics here. We start by importing the class ```python -from deid.dicom import DicomCleaner, get_files +from deid.dicom import DicomCleaner, get_files from deid.data import get_dataset ``` @@ -153,7 +153,7 @@ Deid has two ways of representing coordinates: - The [ctp standard](https://mircwiki.rsna.org/index.php?title=The_CTP_DICOM_Pixel_Anonymizer) with `ctpcoordinate` or `ctpkeepcoordinate` - Our coordinate standard (xmin, ymin, xmax, ymax) with `coordinate` or `keepcoordinate` - + By default, we use a list of rules provided by CTP and other users in [dicom.deid](https://github.com/pydicom/deid/blob/master/deid/data/deid.dicom), and these are based on finding known locations based on dicom header values. With and without the `ctp` prefix to determine the coordinate convention used, there are two operations we can apply to coordinates: @@ -183,12 +183,12 @@ but instead of `coordinates` you would have: ``` In that the default mask is 1s (to indicate keep) this would only be meaningful if you've already -provided a directive to clean some area including that region. +provided a directive to clean some area including that region. #### Custom Clean Let's say that you want to perform a cleaning action, but you don't have corresponding header fields -to indicate it. In fact, you want to go further and extract the coordinates from a field in the image. +to indicate it. In fact, you want to go further and extract the coordinates from a field in the image. In this case you can use a smiliar snippet. In the example below, we take the coordinates defined based on the [SequenceOfUltrasoundRegions](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.8.5.5.html#table_C.8-17) identifier, and tell deid to keep that region. @@ -198,7 +198,7 @@ LABEL Clean Ultrasound keepcoordinates from:SequenceOfUltrasoundRegions ``` -And since the default value of the mask is all 1s, we need to start with the inverse, +And since the default value of the mask is all 1s, we need to start with the inverse, all zeros! We can do that as follows: ```console @@ -213,7 +213,7 @@ LABEL Clean Ultrasound In the above, we first tell deid to blank the entire mask (setting values of 0). We then ask to look for the dicom header `SequenceOfUltrasoundRegions` (it must be present), and given this condition, we look for coordinates -from that field, and set then to a value of 1 (keep) in our mask. +from that field, and set then to a value of 1 (keep) in our mask. These actions is added to the provided deid.dicom.ultrasound recipe, a subset shown below: @@ -264,7 +264,7 @@ and then run clean to perform the actions. client.clean() import os -cleaner.save_dicom(output_folder=os.getcwd()) +cleaner.save_dicom(output_folder=os.getcwd()) '/home/vanessa/Desktop/Code/deid/echo/cleaned-echo1.dcm' ``` @@ -291,9 +291,9 @@ client.detect(dicom_file) ### Clean and Save -After detection, the flags that were triggered are saved with the client, until -you override with another file. You can now run clean, and save the -images to a format that you like. Remember that even with flags, if there are no coordinates +After detection, the flags that were triggered are saved with the client, until +you override with another file. You can now run clean, and save the +images to a format that you like. Remember that even with flags, if there are no coordinates associated with the flag, no changes are done to the image. ```python @@ -330,15 +330,15 @@ Generating animation... ### Debugging and Important Notes -In a recent pull request we [encountered](https://github.com/pydicom/deid/pull/134) +In a recent pull request we [encountered](https://github.com/pydicom/deid/pull/134) an issue where a user had decompressed the data without changing the `dicom.PixelInterpretation`, which is a header that tells pydicom how to read the data. The suggested approach -when you do `dicom.decompress()` is to set `dicom.PhotometricInterpreation = 'RGB'` +when you do `dicom.decompress()` is to set `dicom.PhotometricInterpretation = 'RGB'` after doing so: ```python dicom.decompress() -dicom.PhotometricInterpreation = 'RGB' +dicom.PhotometricInterpretation = 'RGB' ``` If you see this warning message: @@ -356,7 +356,7 @@ client.clean(fix_interpretation=False) ``` Please [see the note](https://pydicom.github.io/pydicom/stable/old/image_data_handlers.html#usage) -on the pydicom documentation for more details. Also, it would be useful to use machine +on the pydicom documentation for more details. Also, it would be useful to use machine learning to detect text. if you want to develop this or have ideas, please reach out. @@ -375,7 +375,7 @@ dicom_file_data = pydicom.read_file(DICOM_FILE) burned_pixels_results = has_burned_pixels(dicom_file_data) cleaned_pixels = clean_pixel_data( - dicom_file=dicom_file_data, + dicom_file=dicom_file_data, results=burned_pixels_results ) diff --git a/docs/_docs/getting-started/dicom-put.md b/docs/_docs/getting-started/dicom-put.md index 309119bd..5f5058f3 100644 --- a/docs/_docs/getting-started/dicom-put.md +++ b/docs/_docs/getting-started/dicom-put.md @@ -13,7 +13,7 @@ To run these examples, you'll need to install external deid-data. $ pip install deid-data ``` -At this point, we have a bunch of dicom files, have written a recipe with +At this point, we have a bunch of dicom files, have written a recipe with actions, and want to run those actions across the files. The easiest way to do this is with the `DicomParser` @@ -38,7 +38,7 @@ import os path = os.path.abspath("%s/../examples/deid/deid.dicom-groups" % get_installdir()) ``` -Let's now import the DicomParser and +Let's now import the DicomParser and ```python from deid.dicom.parser import DicomParser @@ -52,7 +52,7 @@ You can see that the dicom is loaded: ```python parser.dicom -Out[32]: +Out[32]: (0008, 0005) Specific Character Set CS: 'ISO_IR 100' (0008, 0016) SOP Class UID UI: Secondary Capture Image Storage (0008, 0018) SOP Instance UID UI: 1.2.276.0.7230010.3.1.4.8323329.5329.1495927169.580351 @@ -174,7 +174,7 @@ You would do the same thing for a named function. Where do these end up? In a l held by the parser: ```python -parser.lookup +parser.lookup {'id': 'new-cookie-id', 'source_id': 'new-operator-id'} ``` @@ -183,7 +183,7 @@ So they will be available when you parse. ### 4. Parse Away! Now that we've defined the variables that we need, and we've loaded our recipe -and dicom, let's perform the parse action! By default, sequences and private +and dicom, let's perform the parse action! By default, sequences and private tags are not removed (so they are included in parsing). ```python @@ -287,7 +287,7 @@ And you could save your data to file. ```python parser.save("/tmp/mydicom.dcm") ``` - + ## Replace Identifiers If you want to do the above in bulk, you might find it easier to use the `replace_identifiers` @@ -321,7 +321,7 @@ cleaned_files = replace_identifiers(dicom_files=dicom_files, save=True) You will notice that by default, the files are written to a temporary directory: ```python -cleaned_files +cleaned_files ['/tmp/tmphvj05c6y/image4.dcm', '/tmp/tmphvj05c6y/image2.dcm', '/tmp/tmphvj05c6y/image7.dcm', @@ -347,8 +347,8 @@ cleaned_files '/home/vanessa/Desktop/image5.dcm'] ``` -One setting that is important is `overwrite`, which is by default set to False. -For example, let's say we decided to run the above again, using the same output +One setting that is important is `overwrite`, which is by default set to False. +For example, let's say we decided to run the above again, using the same output directory of desktop (where the files already exist!) ```python @@ -375,7 +375,7 @@ DEBUG item id: 1.2.276.0.7230010.3.1.4.8323329.5329.1495927169.580351 ERROR image5.dcm already exists, overwrite set to False. Not writing. ``` -The function gets angry at us, and returns the list of files that are already +The function gets angry at us, and returns the list of files that are already there. If you really want to force an overwrite, then you need to do this: @@ -385,14 +385,14 @@ cleaned_files = replace_identifiers(dicom_files=dicom_files, overwrite=True) ``` -wherever you dump your new dicoms, it's up to you to decide how to then move +wherever you dump your new dicoms, it's up to you to decide how to then move and store them, and (likely) deal with the original data with identifiers. ## Private Tags An important note is that by default, this function will keep private tags - (`remove_private=False`). If you need to remove private tags + (`remove_private=False`). If you need to remove private tags you would want to set this to True. @@ -417,7 +417,7 @@ You could also do pixel scraping first, and then call the function ### Getting Private Tags -If you are working within python and want to get private tags for inspection, +If you are working within python and want to get private tags for inspection, you can do that too! Let's first load some default data: diff --git a/docs/_docs/getting-started/index.md b/docs/_docs/getting-started/index.md index 1fa47dc8..7d7971c8 100644 --- a/docs/_docs/getting-started/index.md +++ b/docs/_docs/getting-started/index.md @@ -7,7 +7,7 @@ order: 1 Deid does two things: clean header and image data, and filter based on headers. These algorithms are not sophisticated - they perform their duties based on -parsing header metadata. Here we will provide a simple walkthrough to get started +parsing header metadata. Here we will provide a simple walkthrough to get started with deid. In the following pages, we will show you how to load data, configure a custom recipe to deidentify and filter, and then clean pixels. diff --git a/docs/_docs/install/docker.md b/docs/_docs/install/docker.md index 20658ee6..cf9f1bc0 100644 --- a/docs/_docs/install/docker.md +++ b/docs/_docs/install/docker.md @@ -7,10 +7,10 @@ order: 2 To use the Docker container, you should first ensure that you have [installed Docker](https://www.docker.com/get-started) on your computer. -For the container we will use, we currently provide a container hosted -at [pydicom/deid](http://hub.docker.com/r/pydicom/deid) that you can use to +For the container we will use, we currently provide a container hosted +at [pydicom/deid](http://hub.docker.com/r/pydicom/deid) that you can use to quickly run deid without any installation of other dependencies -or compiling on your host. +or compiling on your host. When you are ready, try running {{ site.title }} using it. This first command will access the deid executable: @@ -28,6 +28,6 @@ It might also be desired to shell into the container and interact with deid via python: ```bash -$ docker run -it --entrypoint bash {{ site.docker }} +$ docker run -it --entrypoint bash {{ site.docker }} (base) root@488f5e7f53a1:/code# ``` diff --git a/docs/_docs/install/local.md b/docs/_docs/install/local.md index c9140982..f156fdd0 100644 --- a/docs/_docs/install/local.md +++ b/docs/_docs/install/local.md @@ -5,7 +5,7 @@ order: 3 --- -Let's walk through how to install {{ site.title }} locally. +Let's walk through how to install {{ site.title }} locally. ## Install from Github diff --git a/docs/_docs/user-docs/client.md b/docs/_docs/user-docs/client.md index 4e997270..5b647762 100644 --- a/docs/_docs/user-docs/client.md +++ b/docs/_docs/user-docs/client.md @@ -11,10 +11,10 @@ $ which deid /home/vanessa/anaconda3/bin/deid ``` -**Note** @vsoch thinks this client could be better organized (with regard to +**Note** @vsoch thinks this client could be better organized (with regard to usage and commands) please [provide feedback] -(https://www.github.com/pydicom/deid/issues) as you test these functions! -The primary use of deid by the developers group has +(https://www.github.com/pydicom/deid/issues) as you test these functions! +The primary use of deid by the developers group has been via functions in Python, so the client might be neglected. @@ -29,20 +29,20 @@ usage: deid [-h] [--input FOLDER] [--version] [--print] [--format {dicom}] deid: error: the following arguments are required: --action/-a ``` -It's telling us that it wants an action, which can be one of `{get,put,all}`, -where "get" corresponds to getting identifiers from a dataset, "put" corresponds -to doing the replacement, and "all" means you want to do both at the same time -(meaning you won't intervene between the calls to customize any of the replacement -actions. Let's walk through the simplest use case, giving an action without -any other arguments, which will use the default dataset provided (a subset +It's telling us that it wants an action, which can be one of `{get,put,all}`, +where "get" corresponds to getting identifiers from a dataset, "put" corresponds +to doing the replacement, and "all" means you want to do both at the same time +(meaning you won't intervene between the calls to customize any of the replacement +actions. Let's walk through the simplest use case, giving an action without +any other arguments, which will use the default dataset provided (a subset of [dicom-cookies](https://pydicom.github.io/dicom-cookies)). ### Inspect -Currently, inspect is simply going to look at header fields and try to guess -if there are burned pixels in the image. I am not convinced this is robust - -the filters I am using are from [MIRC CTP](https://github.com/johnperry/CTP/blob/master/source/files/scripts/BurnedInPixelsFilter.script), +Currently, inspect is simply going to look at header fields and try to guess +if there are burned pixels in the image. I am not convinced this is robust - +the filters I am using are from [MIRC CTP](https://github.com/johnperry/CTP/blob/master/source/files/scripts/BurnedInPixelsFilter.script), and seem to generally look for: - if the field Burned Annotation is set to Yes @@ -52,7 +52,7 @@ and seem to generally look for: To inspect a dataset, call the `--action` (or `-a`) command with `inspect`: ```bash -deid --action inspect +deid --action inspect No input folder specified, will use demo dicom-cookies. DEBUG Found 7 contender files in dicom-cookies DEBUG Checking 7 dicom files for validation. @@ -70,7 +70,7 @@ or specify your own dataset with `--input/-i` ``` deid --action inspect -input /home/vanessa/Desktop/test/su/ -DEBUG Found 62 contender files in +DEBUG Found 62 contender files in DEBUG Checking 62 dicom files for validation. WARNING Cannot read input file /home/vanessa/Desktop/test/su/__index.xml, skipping. Found 61 valid dicom files @@ -88,8 +88,8 @@ DEBUG FO-3565568840462998171.dcm header filter indicates pixels are clean. ``` ### Get -Let's specify `--action` as get. This means that we will use a demo dataset, -and the ids (a data structure saved in compressed python file called a "pickle") +Let's specify `--action` as get. This means that we will use a demo dataset, +and the ids (a data structure saved in compressed python file called a "pickle") will be saved to a temporary directory. ``` @@ -123,15 +123,15 @@ DEBUG Found 27 defined fields for image5.dcm Writing ids to /tmp/tmpv3h9b11t/deid-ids.pkl ``` -Pickle was chosen because what appear as strings are actually data structures -that write nicely back into dicom (or other) files. It also is likely the case -that to save and tweak these identifiers, you will likely need to load them -programmatically anyway, and we are doing a good deed for the world to -encourage using Python :). +Pickle was chosen because what appear as strings are actually data structures +that write nicely back into dicom (or other) files. It also is likely the case +that to save and tweak these identifiers, you will likely need to load them +programmatically anyway, and we are doing a good deed for the world to +encourage using Python :). #### Customize Message Level -Also by default, we give you debug output. If you want to silence the output, +Also by default, we give you debug output. If you want to silence the output, then you can add `--quiet`: ``` @@ -143,8 +143,8 @@ GET and PUT identifiers from dicom-cookies Writing ids to /tmp/tmp6sywao9a/deid-ids.pkl ``` -Note that you are actually receiving the level `INFO`, because otherwise you might -not know where the file was saved. If you really want to tweak your level, +Note that you are actually receiving the level `INFO`, because otherwise you might +not know where the file was saved. If you really want to tweak your level, then just export what you like in an environment variable, `MESSAGELEVEL`: ```bash @@ -153,11 +153,11 @@ export MESSAGELEVEL deid --action get ``` -And nothing would be printed! +And nothing would be printed! #### Customize Output -If you just want to check output, it might be useful to print it to the screen. +If you just want to check output, it might be useful to print it to the screen. You can do this by adding the flag `--print`: @@ -165,7 +165,7 @@ You can do this by adding the flag `--print`: $ deid --action get --print ``` -You will see a WHOLE bunch of output print to the screen! You could pipe this +You will see a WHOLE bunch of output print to the screen! You could pipe this output into a file, however be careful that this will not be proper json. ``` @@ -174,8 +174,8 @@ $ cat deid-ids.txt | more ``` ### Put -Put works in the same way, except you would also hand it your ids (the pickle) -file, in the case that you don't call get with put (via all). In case you changed +Put works in the same way, except you would also hand it your ids (the pickle) +file, in the case that you don't call get with put (via all). In case you changed your message level to `QUIET`, change it back! ```bash @@ -301,12 +301,12 @@ DEBUG item id: 1.2.276.0.7230010.3.1.4.8323329.5329.1495927169.580351 7 dicom files at /home/vanessa/Desktop ``` -and no error message occurs. +and no error message occurs. #### Customize Deid Recipe -If you generate a configuration file (deid) that says how you want to deidentify -your data, then you can give that to get. Here is a simple one, discussed in +If you generate a configuration file (deid) that says how you want to deidentify +your data, then you can give that to get. Here is a simple one, discussed in [config](config.md) and [available here](../examples/deid/deid.dicom) for our dicom cookies: ```bash @@ -388,11 +388,11 @@ image2.dcm image4.dcm image6.dcm ``` -The reason because we get a lot of warnings is because I specified to replace -fields in the data with variables in the ids data structure, but I didn't -actually add them. In practice, this would mean they would be removed from the header. -We would have needed to load the pickle, add the identifiers, and then give the -ids datastructure to put. Let's quickly see what that would look like +The reason because we get a lot of warnings is because I specified to replace +fields in the data with variables in the ids data structure, but I didn't +actually add them. In practice, this would mean they would be removed from the header. +We would have needed to load the pickle, add the identifiers, and then give the +ids datastructure to put. Let's quickly see what that would look like (in python). First, load the identifiers we generated: ```python @@ -407,8 +407,8 @@ ids = load_identifiers(idspkl) Loading /tmp/tmp3g0x8ts2/deid-ids.pkl ``` -Now, we need to define an "id" and "source_id" to substitute, here is a loop -to do that. At this point you would probably want to save whatever you need to +Now, we need to define an "id" and "source_id" to substitute, here is a loop +to do that. At this point you would probably want to save whatever you need to your IRB approved database / protocol. ```python @@ -427,7 +427,7 @@ ids = save_identifiers(ids) exit ``` -Now let's try again - since the fields are defined in the data, we shouldn't see +Now let's try again - since the fields are defined in the data, we shouldn't see the warning messages. ```python @@ -525,8 +525,8 @@ DEBUG item id: 1.2.276.0.7230010.3.1.4.8323329.5329.1495927169.580351 7 dicom files at /tmp/tmp12lwhq7x ``` -This will mean that the majority of things will be removed. You can still specify a -deid file to have additions, or blanks, but all variables must be present in the +This will mean that the majority of things will be removed. You can still specify a +deid file to have additions, or blanks, but all variables must be present in the header already (eg, the fields returned in the ids that we had tweaked above) for it to work. diff --git a/docs/_docs/user-docs/index.md b/docs/_docs/user-docs/index.md index d551b4a3..cde44ec6 100644 --- a/docs/_docs/user-docs/index.md +++ b/docs/_docs/user-docs/index.md @@ -6,7 +6,7 @@ order: 1 --- Along with the [getting started]({{ site.baseurl }}/getting-started/) guides, -these pages will help you to use the deid software. +these pages will help you to use the deid software. ## Recipes diff --git a/docs/_docs/user-docs/recipe-filters.md b/docs/_docs/user-docs/recipe-filters.md index cf90e58b..21acc5d9 100644 --- a/docs/_docs/user-docs/recipe-filters.md +++ b/docs/_docs/user-docs/recipe-filters.md @@ -4,8 +4,8 @@ category: User Documentation order: 2 --- -As you recall from the [configuration]({{ site.baseurl }}/getting-started/dicom-config/) -notes page, the deid recipe allows you to configure both cleaning of pixels +As you recall from the [configuration]({{ site.baseurl }}/getting-started/dicom-config/) +notes page, the deid recipe allows you to configure both cleaning of pixels and changing header values. This document will cover the first, how to write and apply filters to clean images. @@ -14,11 +14,11 @@ and apply filters to clean images. You might want to flag images based on their header values. For example, "I have a directory of dicom images, and I want to find those with `Modality` as `CT`. You can create -one or more filter groups to do this. If you create more than one, an image is attributed -to the filter group that comes first in the deid recipe file (indicating higher priority). -For example, an image that would be flagged with a general Blacklist criteria that is first flagged with Greylist -(meaning we know how to clean it) belongs to Greylist. We check higher priority -first to be computationally more efficient, because we can stop checking the +one or more filter groups to do this. If you create more than one, an image is attributed +to the filter group that comes first in the deid recipe file (indicating higher priority). +For example, an image that would be flagged with a general Blacklist criteria that is first flagged with Greylist +(meaning we know how to clean it) belongs to Greylist. We check higher priority +first to be computationally more efficient, because we can stop checking the image when we hit the first criteria flag. @@ -34,7 +34,7 @@ While you are free to define your own groups and criteria, we provide a [default #### Filter Example -I'll again show you the previous example, but give more detail this time. +I'll again show you the previous example, but give more detail this time. The start of a filter might look like this: ``` @@ -67,25 +67,25 @@ empty SecondaryCaptureDeviceManufacturerModelName empty SecondaryCaptureDeviceSoftwareVersions ``` -Each section is indicated by `%filter`, and within sections, a set of criteria are defined under a `LABEL`. -The formatting of this is inspired by both [CTP](http://mircwiki.rsna.org/index.php?title=The_CTP_DICOM_Filter) +Each section is indicated by `%filter`, and within sections, a set of criteria are defined under a `LABEL`. +The formatting of this is inspired by both [CTP](http://mircwiki.rsna.org/index.php?title=The_CTP_DICOM_Filter) and my early work with [Singularity Containers](https://sylabs.io/docs/), which is based on RPM. ##### How are images filtered? -You can imagine an image starting at the top of the file, and moving down line by line. -If at any point it doesn't pass criteria, it is flagged and placed with the group, -and no further checking is done. For this purpose, the sections are ordered by -their specificity and preference. This means that, for the above, by placing -blacklist after graylist we are saying that an image that could be flagged -to be both in the blacklist and graylist will hit the graylist first. This is -logical because the graylist corresponds to a specific set of image header -criteria for which we know how to clean. We only resort to general blacklist +You can imagine an image starting at the top of the file, and moving down line by line. +If at any point it doesn't pass criteria, it is flagged and placed with the group, +and no further checking is done. For this purpose, the sections are ordered by +their specificity and preference. This means that, for the above, by placing +blacklist after graylist we are saying that an image that could be flagged +to be both in the blacklist and graylist will hit the graylist first. This is +logical because the graylist corresponds to a specific set of image header +criteria for which we know how to clean. We only resort to general blacklist criteria if we make it far enough and haven't been convinced that there isn't PHI. ##### How do I read a criteria? -Each filter section criteria starts with `LABEL`. this is an identifier to +Each filter section criteria starts with `LABEL`. this is an identifier to report to the user given that the flag goes off. Each criteria then has the following format: ``` @@ -98,8 +98,8 @@ LABEL Burned In Annotation contains ImageType SAVE ``` -Reads "flag the image with a Burned In Annotation, which belongs to the blacklist filter, -if the ImageType fields contains SAVE." If you want to do an "and" statement across +Reads "flag the image with a Burned In Annotation, which belongs to the blacklist filter, +if the ImageType fields contains SAVE." If you want to do an "and" statement across two fields, just use `+`: ``` @@ -128,7 +128,7 @@ empty ImageType - First check: "flag the image if SeriesDescription contains SAVE AND BurnedInAnnotation has YES" - Second check: "flag the image if ImageType is empty or DateOfSecondaryCapture is empty." -And to make it even simpler, if you want to check one field for a value a OR b, +And to make it even simpler, if you want to check one field for a value a OR b, you can use regular expressions. The following checks ImageType for "CT" OR "MRI" ``` @@ -138,7 +138,7 @@ equals ImageType CT|MRI which is equivalent to: ``` -equals ImageTyoe CT +equals ImageType CT || equals ImageType MRI ``` @@ -157,14 +157,14 @@ For all of the below, case does not matter. All fields are changed to lowercase - **contains** is using a regular expression search, meaning that the word can appear anywhere in the field (eg, a `contains` "save" would flag a value of "saved". - **equals** means you want to match an expression exactly. `equals` with "save" would not flag a value of "saved". - - **empty** means that the header is present in the data, but it's an empty string (eg, ""). + - **empty** means that the header is present in the data, but it's an empty string (eg, ""). - **missing** means that the header is not present in the data. - **notEquals** is the inverse of equals - **notContains** is the inverse of contains ##### How do I customize the process? -There are several things you can customize! +There are several things you can customize! - You first don't have to use the application default files. You can make a copy, customize to your liking, and provide the path to the file as an argument. If you have criteria to contribute, we encourage you to do so. - The name of the filter itself doesn't matter, you are free to use different terms than whitelist, blacklist, etc. diff --git a/docs/_docs/user-docs/recipe-funcs.md b/docs/_docs/user-docs/recipe-funcs.md index 211685b3..a034c567 100644 --- a/docs/_docs/user-docs/recipe-funcs.md +++ b/docs/_docs/user-docs/recipe-funcs.md @@ -64,7 +64,7 @@ The default uses `stable_remapping=true`, which means we use the original UUID a to be able to consistently return the same value between runs. You can disable it, however we do not recommended it (but maybe could be appropriate for your use case). -You can also optionally define a custom prefix. Note that it needs to match the +You can also optionally define a custom prefix. Note that it needs to match the regular expression `^(0|[1-9][0-9]*)(\\.(0|[1-9][0-9]*))*\\.$` which (in spoken terms) is a number followed by a period, another number, and ending also in a period (e.g, `1.55.`). @@ -78,7 +78,7 @@ REPLACE ReferringPhysicianName deid_func:pydicom_uuid prefix=1.55. ## A Dicom UUID A more "formal" uuid function was added that requires an organization root. Your -organization should have it's own - for example the `PYMEDPHYS_ROOT_UID` is +organization should have it's own - for example the `PYMEDPHYS_ROOT_UID` is "1.2.826.0.1.3680043.10.188" so we might do: ``` @@ -105,7 +105,7 @@ This would make a final value that looks something like `patient_into-5897bd32-b ## Jitter Jitter is intended for datetime fields, and technically you can just use the `JITTER` function provided -natively in the recipe. We decided to include it here to add further customization. For example, you can provide +natively in the recipe. We decided to include it here to add further customization. For example, you can provide variables for both days and years for a more fine-tuned jitter. We also wanted to add it here because technically it is a custom action. A jitter (as a custom deid function) might look like this: diff --git a/docs/_docs/user-docs/recipe-groups.md b/docs/_docs/user-docs/recipe-groups.md index 4fec419b..5d3aeb26 100644 --- a/docs/_docs/user-docs/recipe-groups.md +++ b/docs/_docs/user-docs/recipe-groups.md @@ -77,8 +77,8 @@ as follows: REPLACE fields:patient_info func:generate_uid ``` -And this reads nicely as "Replace fields defined in patient_info to be the variable -I'm defining with the function generate_uid (which should be added to each item +And this reads nicely as "Replace fields defined in patient_info to be the variable +I'm defining with the function generate_uid (which should be added to each item after lookup). This of course means that the actions supported for the `%fields` section includes: @@ -112,7 +112,7 @@ notice that the first line uses a new action `SPLIT`: SPLIT PatientsName splitval='^';minlength='4' ``` -This action says to start with the field `PatientsName`, split based on the `^` +This action says to start with the field `PatientsName`, split based on the `^` character, and keep results that have a length greater than or equal to 4. Let's talk about these actions in detail. Field is the same, but we also have split: @@ -147,5 +147,5 @@ REMOVE ALL values:patient_info Or you could chose some other field name, or field expander, if you want to limit the removal to some subset. -If you haven't yet, take a look at how at generate a basic [get]({{ site.baseurl }}/getting-started/dicom-get/), +If you haven't yet, take a look at how at generate a basic [get]({{ site.baseurl }}/getting-started/dicom-get/), which is will get a set of fields and values from your dicom files. diff --git a/docs/_docs/user-docs/recipe-headers.md b/docs/_docs/user-docs/recipe-headers.md index 747ce645..01132cef 100644 --- a/docs/_docs/user-docs/recipe-headers.md +++ b/docs/_docs/user-docs/recipe-headers.md @@ -196,7 +196,7 @@ def is_name(dicom, value, field): splitvalues = name.split('^') for phi in splitvalues: if len(phi) > 4 and phi in currentvalue: - return True + return True return False ``` diff --git a/docs/_docs/user-docs/recipe-labels.md b/docs/_docs/user-docs/recipe-labels.md index f76d4662..206b1237 100644 --- a/docs/_docs/user-docs/recipe-labels.md +++ b/docs/_docs/user-docs/recipe-labels.md @@ -4,8 +4,8 @@ category: User Documentation order: 3 --- -The `%labels` section is a way for the user to supply custom commands to an -application that aren't relevant to the header or pixels. For example, If I +The `%labels` section is a way for the user to supply custom commands to an +application that aren't relevant to the header or pixels. For example, If I wanted to carry around a version or a maintainer address, I could do that as follows: ``` @@ -21,8 +21,8 @@ ADD MAINTAINER vsochat@stanford.edu ADD VERSION 1.0 ``` -As you can see, the labels follow the same action commands as before, in the case -that the application needs them. In case you are interested in what the +As you can see, the labels follow the same action commands as before, in the case +that the application needs them. In case you are interested in what the application sees when it reads the file above (if you are a developer) it looks like this: ``` @@ -56,6 +56,6 @@ application sees when it reads the file above (if you are a developer) it looks } ``` -And you are free to map the actions (eg, `ADD`, `REMOVE`) onto whatever functionality -is relevant to your application, or just skip the action entirely and use the +And you are free to map the actions (eg, `ADD`, `REMOVE`) onto whatever functionality +is relevant to your application, or just skip the action entirely and use the fields and values. diff --git a/docs/_docs/user-docs/tags.md b/docs/_docs/user-docs/tags.md index a9baca89..e7066cad 100644 --- a/docs/_docs/user-docs/tags.md +++ b/docs/_docs/user-docs/tags.md @@ -4,7 +4,7 @@ category: User Documentation order: 7 --- -It is sometimes helpful to be able to find a particular tag. [Pydicom](https://www.github.com/pydicom/pydicom) +It is sometimes helpful to be able to find a particular tag. [Pydicom](https://www.github.com/pydicom/pydicom) has done a great job of providing a dictionary of tags: ```python @@ -13,7 +13,7 @@ from pydicom._dicom_dict import DicomDictionary ## Search By Name -and we extend that here to make it easy to find tags. For example, +and we extend that here to make it easy to find tags. For example, we can use a function to search based on name: ```python @@ -36,9 +36,9 @@ find_tag('Modality', VR='CS') ## Search Repeaters (Retired) -If you want to search the set of Repeats (or tags I think pydicom doesn't use / calls retired). -Then set `retired=True`. For example, when I search for `Overlay Description` the normal -way I get nothing, but setting this flag returns the (old) value. This would be useful +If you want to search the set of Repeats (or tags I think pydicom doesn't use / calls retired). +Then set `retired=True`. For example, when I search for `Overlay Description` the normal +way I get nothing, but setting this flag returns the (old) value. This would be useful given that you need to look up a tag for an older dataset. ```python diff --git a/docs/api_docs/conf.py b/docs/api_docs/conf.py index f9799012..53f974f7 100644 --- a/docs/api_docs/conf.py +++ b/docs/api_docs/conf.py @@ -13,8 +13,9 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os +import sys + from recommonmark.parser import CommonMarkParser source_parsers = {".md": CommonMarkParser} @@ -61,7 +62,7 @@ project = "deid" copyright = "2017-2022, Vanessa Sochat" -from deid import version +from deid import version # noqa # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/api_docs/index.rst b/docs/api_docs/index.rst index 8a0fbc82..52d3e955 100644 --- a/docs/api_docs/index.rst +++ b/docs/api_docs/index.rst @@ -38,7 +38,7 @@ Resources :caption: API Reference :name: api-reference :maxdepth: 3 - + source/deid.config.rst source/deid.data.rst source/deid.dicom.actions.rst diff --git a/docs/assets/css/deid.css b/docs/assets/css/deid.css index c078a326..54a7097a 100644 --- a/docs/assets/css/deid.css +++ b/docs/assets/css/deid.css @@ -245,8 +245,8 @@ samp { transform:rotate(360deg); } } - -#name { + +#name { font-size: 46px; display: inline-block; vertical-align: middle; @@ -254,7 +254,7 @@ samp { text-decoration: none !important; font-weight:400; } - + #box { width: 450px; border: solid black 2px; text-align: justify; diff --git a/docs/assets/img/emblem.svg b/docs/assets/img/emblem.svg index 4c372409..98af8933 100644 --- a/docs/assets/img/emblem.svg +++ b/docs/assets/img/emblem.svg @@ -1,4 +1,4 @@ - \ No newline at end of file + diff --git a/docs/assets/js/lunr.min.js b/docs/assets/js/lunr.min.js index 884d1f2a..b0198dff 100644 --- a/docs/assets/js/lunr.min.js +++ b/docs/assets/js/lunr.min.js @@ -4,4 +4,4 @@ * MIT Licensed * @license */ -!function(){var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.7.0",t.utils={},t.utils.warn=function(t){return function(e){t.console&&console.warn&&console.warn(e)}}(this),t.utils.asString=function(t){return void 0===t||null===t?"":t.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var t=Array.prototype.slice.call(arguments),e=t.pop(),n=t;if("function"!=typeof e)throw new TypeError("last argument must be a function");n.forEach(function(t){this.hasHandler(t)||(this.events[t]=[]),this.events[t].push(e)},this)},t.EventEmitter.prototype.removeListener=function(t,e){if(this.hasHandler(t)){var n=this.events[t].indexOf(e);this.events[t].splice(n,1),this.events[t].length||delete this.events[t]}},t.EventEmitter.prototype.emit=function(t){if(this.hasHandler(t)){var e=Array.prototype.slice.call(arguments,1);this.events[t].forEach(function(t){t.apply(void 0,e)})}},t.EventEmitter.prototype.hasHandler=function(t){return t in this.events},t.tokenizer=function(e){return arguments.length&&null!=e&&void 0!=e?Array.isArray(e)?e.map(function(e){return t.utils.asString(e).toLowerCase()}):e.toString().trim().toLowerCase().split(t.tokenizer.seperator):[]},t.tokenizer.seperator=/[\s\-]+/,t.tokenizer.load=function(t){var e=this.registeredFunctions[t];if(!e)throw new Error("Cannot load un-registered function: "+t);return e},t.tokenizer.label="default",t.tokenizer.registeredFunctions={"default":t.tokenizer},t.tokenizer.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing tokenizer: "+n),e.label=n,this.registeredFunctions[n]=e},t.Pipeline=function(){this._stack=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[e.label]=e},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.registeredFunctions[e];if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._stack.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");i+=1,this._stack.splice(i,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");this._stack.splice(i,0,n)},t.Pipeline.prototype.remove=function(t){var e=this._stack.indexOf(t);-1!=e&&this._stack.splice(e,1)},t.Pipeline.prototype.run=function(t){for(var e=[],n=t.length,i=this._stack.length,r=0;n>r;r++){for(var o=t[r],s=0;i>s&&(o=this._stack[s](o,r,t),void 0!==o&&""!==o);s++);void 0!==o&&""!==o&&e.push(o)}return e},t.Pipeline.prototype.reset=function(){this._stack=[]},t.Pipeline.prototype.toJSON=function(){return this._stack.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Vector=function(){this._magnitude=null,this.list=void 0,this.length=0},t.Vector.Node=function(t,e,n){this.idx=t,this.val=e,this.next=n},t.Vector.prototype.insert=function(e,n){this._magnitude=void 0;var i=this.list;if(!i)return this.list=new t.Vector.Node(e,n,i),this.length++;if(en.idx?n=n.next:(i+=e.val*n.val,e=e.next,n=n.next);return i},t.Vector.prototype.similarity=function(t){return this.dot(t)/(this.magnitude()*t.magnitude())},t.SortedSet=function(){this.length=0,this.elements=[]},t.SortedSet.load=function(t){var e=new this;return e.elements=t,e.length=t.length,e},t.SortedSet.prototype.add=function(){var t,e;for(t=0;t1;){if(o===t)return r;t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r]}return o===t?r:-1},t.SortedSet.prototype.locationFor=function(t){for(var e=0,n=this.elements.length,i=n-e,r=e+Math.floor(i/2),o=this.elements[r];i>1;)t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r];return o>t?r:t>o?r+1:void 0},t.SortedSet.prototype.intersect=function(e){for(var n=new t.SortedSet,i=0,r=0,o=this.length,s=e.length,a=this.elements,h=e.elements;;){if(i>o-1||r>s-1)break;a[i]!==h[r]?a[i]h[r]&&r++:(n.add(a[i]),i++,r++)}return n},t.SortedSet.prototype.clone=function(){var e=new t.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},t.SortedSet.prototype.union=function(t){var e,n,i;this.length>=t.length?(e=this,n=t):(e=t,n=this),i=e.clone();for(var r=0,o=n.toArray();rp;p++)c[p]===a&&d++;h+=d/f*l.boost}}this.tokenStore.add(a,{ref:o,tf:h})}n&&this.eventEmitter.emit("add",e,this)},t.Index.prototype.remove=function(t,e){var n=t[this._ref],e=void 0===e?!0:e;if(this.documentStore.has(n)){var i=this.documentStore.get(n);this.documentStore.remove(n),i.forEach(function(t){this.tokenStore.remove(t,n)},this),e&&this.eventEmitter.emit("remove",t,this)}},t.Index.prototype.update=function(t,e){var e=void 0===e?!0:e;this.remove(t,!1),this.add(t,!1),e&&this.eventEmitter.emit("update",t,this)},t.Index.prototype.idf=function(t){var e="@"+t;if(Object.prototype.hasOwnProperty.call(this._idfCache,e))return this._idfCache[e];var n=this.tokenStore.count(t),i=1;return n>0&&(i=1+Math.log(this.documentStore.length/n)),this._idfCache[e]=i},t.Index.prototype.search=function(e){var n=this.pipeline.run(this.tokenizerFn(e)),i=new t.Vector,r=[],o=this._fields.reduce(function(t,e){return t+e.boost},0),s=n.some(function(t){return this.tokenStore.has(t)},this);if(!s)return[];n.forEach(function(e,n,s){var a=1/s.length*this._fields.length*o,h=this,u=this.tokenStore.expand(e).reduce(function(n,r){var o=h.corpusTokens.indexOf(r),s=h.idf(r),u=1,l=new t.SortedSet;if(r!==e){var c=Math.max(3,r.length-e.length);u=1/Math.log(c)}o>-1&&i.insert(o,a*s*u);for(var f=h.tokenStore.get(r),d=Object.keys(f),p=d.length,v=0;p>v;v++)l.add(f[d[v]].ref);return n.union(l)},new t.SortedSet);r.push(u)},this);var a=r.reduce(function(t,e){return t.intersect(e)});return a.map(function(t){return{ref:t,score:i.similarity(this.documentVector(t))}},this).sort(function(t,e){return e.score-t.score})},t.Index.prototype.documentVector=function(e){for(var n=this.documentStore.get(e),i=n.length,r=new t.Vector,o=0;i>o;o++){var s=n.elements[o],a=this.tokenStore.get(s)[e].tf,h=this.idf(s);r.insert(this.corpusTokens.indexOf(s),a*h)}return r},t.Index.prototype.toJSON=function(){return{version:t.version,fields:this._fields,ref:this._ref,tokenizer:this.tokenizerFn.label,documentStore:this.documentStore.toJSON(),tokenStore:this.tokenStore.toJSON(),corpusTokens:this.corpusTokens.toJSON(),pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(t){var e=Array.prototype.slice.call(arguments,1);e.unshift(this),t.apply(this,e)},t.Store=function(){this.store={},this.length=0},t.Store.load=function(e){var n=new this;return n.length=e.length,n.store=Object.keys(e.store).reduce(function(n,i){return n[i]=t.SortedSet.load(e.store[i]),n},{}),n},t.Store.prototype.set=function(t,e){this.has(t)||this.length++,this.store[t]=e},t.Store.prototype.get=function(t){return this.store[t]},t.Store.prototype.has=function(t){return t in this.store},t.Store.prototype.remove=function(t){this.has(t)&&(delete this.store[t],this.length--)},t.Store.prototype.toJSON=function(){return{store:this.store,length:this.length}},t.stemmer=function(){var t={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},e={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",r=n+"[^aeiouy]*",o=i+"[aeiou]*",s="^("+r+")?"+o+r,a="^("+r+")?"+o+r+"("+o+")?$",h="^("+r+")?"+o+r+o+r,u="^("+r+")?"+i,l=new RegExp(s),c=new RegExp(h),f=new RegExp(a),d=new RegExp(u),p=/^(.+?)(ss|i)es$/,v=/^(.+?)([^s])s$/,g=/^(.+?)eed$/,m=/^(.+?)(ed|ing)$/,y=/.$/,S=/(at|bl|iz)$/,w=new RegExp("([^aeiouylsz])\\1$"),k=new RegExp("^"+r+i+"[^aeiouwxy]$"),x=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,F=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,_=/^(.+?)(s|t)(ion)$/,z=/^(.+?)e$/,O=/ll$/,P=new RegExp("^"+r+i+"[^aeiouwxy]$"),T=function(n){var i,r,o,s,a,h,u;if(n.length<3)return n;if(o=n.substr(0,1),"y"==o&&(n=o.toUpperCase()+n.substr(1)),s=p,a=v,s.test(n)?n=n.replace(s,"$1$2"):a.test(n)&&(n=n.replace(a,"$1$2")),s=g,a=m,s.test(n)){var T=s.exec(n);s=l,s.test(T[1])&&(s=y,n=n.replace(s,""))}else if(a.test(n)){var T=a.exec(n);i=T[1],a=d,a.test(i)&&(n=i,a=S,h=w,u=k,a.test(n)?n+="e":h.test(n)?(s=y,n=n.replace(s,"")):u.test(n)&&(n+="e"))}if(s=x,s.test(n)){var T=s.exec(n);i=T[1],n=i+"i"}if(s=b,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+t[r])}if(s=E,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+e[r])}if(s=F,a=_,s.test(n)){var T=s.exec(n);i=T[1],s=c,s.test(i)&&(n=i)}else if(a.test(n)){var T=a.exec(n);i=T[1]+T[2],a=c,a.test(i)&&(n=i)}if(s=z,s.test(n)){var T=s.exec(n);i=T[1],s=c,a=f,h=P,(s.test(i)||a.test(i)&&!h.test(i))&&(n=i)}return s=O,a=c,s.test(n)&&a.test(n)&&(s=y,n=n.replace(s,"")),"y"==o&&(n=o.toLowerCase()+n.substr(1)),n};return T}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.generateStopWordFilter=function(t){var e=t.reduce(function(t,e){return t[e]=e,t},{});return function(t){return t&&e[t]!==t?t:void 0}},t.stopWordFilter=t.generateStopWordFilter(["a","able","about","across","after","all","almost","also","am","among","an","and","any","are","as","at","be","because","been","but","by","can","cannot","could","dear","did","do","does","either","else","ever","every","for","from","get","got","had","has","have","he","her","hers","him","his","how","however","i","if","in","into","is","it","its","just","least","let","like","likely","may","me","might","most","must","my","neither","no","nor","not","of","off","often","on","only","or","other","our","own","rather","said","say","says","she","should","since","so","some","than","that","the","their","them","then","there","these","they","this","tis","to","too","twas","us","wants","was","we","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"]),t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(t){return t.replace(/^\W+/,"").replace(/\W+$/,"")},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.TokenStore=function(){this.root={docs:{}},this.length=0},t.TokenStore.load=function(t){var e=new this;return e.root=t.root,e.length=t.length,e},t.TokenStore.prototype.add=function(t,e,n){var n=n||this.root,i=t.charAt(0),r=t.slice(1);return i in n||(n[i]={docs:{}}),0===r.length?(n[i].docs[e.ref]=e,void(this.length+=1)):this.add(r,e,n[i])},t.TokenStore.prototype.has=function(t){if(!t)return!1;for(var e=this.root,n=0;nr;r++){for(var o=t[r],s=0;i>s&&(o=this._stack[s](o,r,t),void 0!==o&&""!==o);s++);void 0!==o&&""!==o&&e.push(o)}return e},t.Pipeline.prototype.reset=function(){this._stack=[]},t.Pipeline.prototype.toJSON=function(){return this._stack.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Vector=function(){this._magnitude=null,this.list=void 0,this.length=0},t.Vector.Node=function(t,e,n){this.idx=t,this.val=e,this.next=n},t.Vector.prototype.insert=function(e,n){this._magnitude=void 0;var i=this.list;if(!i)return this.list=new t.Vector.Node(e,n,i),this.length++;if(en.idx?n=n.next:(i+=e.val*n.val,e=e.next,n=n.next);return i},t.Vector.prototype.similarity=function(t){return this.dot(t)/(this.magnitude()*t.magnitude())},t.SortedSet=function(){this.length=0,this.elements=[]},t.SortedSet.load=function(t){var e=new this;return e.elements=t,e.length=t.length,e},t.SortedSet.prototype.add=function(){var t,e;for(t=0;t1;){if(o===t)return r;t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r]}return o===t?r:-1},t.SortedSet.prototype.locationFor=function(t){for(var e=0,n=this.elements.length,i=n-e,r=e+Math.floor(i/2),o=this.elements[r];i>1;)t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r];return o>t?r:t>o?r+1:void 0},t.SortedSet.prototype.intersect=function(e){for(var n=new t.SortedSet,i=0,r=0,o=this.length,s=e.length,a=this.elements,h=e.elements;;){if(i>o-1||r>s-1)break;a[i]!==h[r]?a[i]h[r]&&r++:(n.add(a[i]),i++,r++)}return n},t.SortedSet.prototype.clone=function(){var e=new t.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},t.SortedSet.prototype.union=function(t){var e,n,i;this.length>=t.length?(e=this,n=t):(e=t,n=this),i=e.clone();for(var r=0,o=n.toArray();rp;p++)c[p]===a&&d++;h+=d/f*l.boost}}this.tokenStore.add(a,{ref:o,tf:h})}n&&this.eventEmitter.emit("add",e,this)},t.Index.prototype.remove=function(t,e){var n=t[this._ref],e=void 0===e?!0:e;if(this.documentStore.has(n)){var i=this.documentStore.get(n);this.documentStore.remove(n),i.forEach(function(t){this.tokenStore.remove(t,n)},this),e&&this.eventEmitter.emit("remove",t,this)}},t.Index.prototype.update=function(t,e){var e=void 0===e?!0:e;this.remove(t,!1),this.add(t,!1),e&&this.eventEmitter.emit("update",t,this)},t.Index.prototype.idf=function(t){var e="@"+t;if(Object.prototype.hasOwnProperty.call(this._idfCache,e))return this._idfCache[e];var n=this.tokenStore.count(t),i=1;return n>0&&(i=1+Math.log(this.documentStore.length/n)),this._idfCache[e]=i},t.Index.prototype.search=function(e){var n=this.pipeline.run(this.tokenizerFn(e)),i=new t.Vector,r=[],o=this._fields.reduce(function(t,e){return t+e.boost},0),s=n.some(function(t){return this.tokenStore.has(t)},this);if(!s)return[];n.forEach(function(e,n,s){var a=1/s.length*this._fields.length*o,h=this,u=this.tokenStore.expand(e).reduce(function(n,r){var o=h.corpusTokens.indexOf(r),s=h.idf(r),u=1,l=new t.SortedSet;if(r!==e){var c=Math.max(3,r.length-e.length);u=1/Math.log(c)}o>-1&&i.insert(o,a*s*u);for(var f=h.tokenStore.get(r),d=Object.keys(f),p=d.length,v=0;p>v;v++)l.add(f[d[v]].ref);return n.union(l)},new t.SortedSet);r.push(u)},this);var a=r.reduce(function(t,e){return t.intersect(e)});return a.map(function(t){return{ref:t,score:i.similarity(this.documentVector(t))}},this).sort(function(t,e){return e.score-t.score})},t.Index.prototype.documentVector=function(e){for(var n=this.documentStore.get(e),i=n.length,r=new t.Vector,o=0;i>o;o++){var s=n.elements[o],a=this.tokenStore.get(s)[e].tf,h=this.idf(s);r.insert(this.corpusTokens.indexOf(s),a*h)}return r},t.Index.prototype.toJSON=function(){return{version:t.version,fields:this._fields,ref:this._ref,tokenizer:this.tokenizerFn.label,documentStore:this.documentStore.toJSON(),tokenStore:this.tokenStore.toJSON(),corpusTokens:this.corpusTokens.toJSON(),pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(t){var e=Array.prototype.slice.call(arguments,1);e.unshift(this),t.apply(this,e)},t.Store=function(){this.store={},this.length=0},t.Store.load=function(e){var n=new this;return n.length=e.length,n.store=Object.keys(e.store).reduce(function(n,i){return n[i]=t.SortedSet.load(e.store[i]),n},{}),n},t.Store.prototype.set=function(t,e){this.has(t)||this.length++,this.store[t]=e},t.Store.prototype.get=function(t){return this.store[t]},t.Store.prototype.has=function(t){return t in this.store},t.Store.prototype.remove=function(t){this.has(t)&&(delete this.store[t],this.length--)},t.Store.prototype.toJSON=function(){return{store:this.store,length:this.length}},t.stemmer=function(){var t={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},e={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",r=n+"[^aeiouy]*",o=i+"[aeiou]*",s="^("+r+")?"+o+r,a="^("+r+")?"+o+r+"("+o+")?$",h="^("+r+")?"+o+r+o+r,u="^("+r+")?"+i,l=new RegExp(s),c=new RegExp(h),f=new RegExp(a),d=new RegExp(u),p=/^(.+?)(ss|i)es$/,v=/^(.+?)([^s])s$/,g=/^(.+?)eed$/,m=/^(.+?)(ed|ing)$/,y=/.$/,S=/(at|bl|iz)$/,w=new RegExp("([^aeiouylsz])\\1$"),k=new RegExp("^"+r+i+"[^aeiouwxy]$"),x=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,F=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,_=/^(.+?)(s|t)(ion)$/,z=/^(.+?)e$/,O=/ll$/,P=new RegExp("^"+r+i+"[^aeiouwxy]$"),T=function(n){var i,r,o,s,a,h,u;if(n.length<3)return n;if(o=n.substr(0,1),"y"==o&&(n=o.toUpperCase()+n.substr(1)),s=p,a=v,s.test(n)?n=n.replace(s,"$1$2"):a.test(n)&&(n=n.replace(a,"$1$2")),s=g,a=m,s.test(n)){var T=s.exec(n);s=l,s.test(T[1])&&(s=y,n=n.replace(s,""))}else if(a.test(n)){var T=a.exec(n);i=T[1],a=d,a.test(i)&&(n=i,a=S,h=w,u=k,a.test(n)?n+="e":h.test(n)?(s=y,n=n.replace(s,"")):u.test(n)&&(n+="e"))}if(s=x,s.test(n)){var T=s.exec(n);i=T[1],n=i+"i"}if(s=b,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+t[r])}if(s=E,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+e[r])}if(s=F,a=_,s.test(n)){var T=s.exec(n);i=T[1],s=c,s.test(i)&&(n=i)}else if(a.test(n)){var T=a.exec(n);i=T[1]+T[2],a=c,a.test(i)&&(n=i)}if(s=z,s.test(n)){var T=s.exec(n);i=T[1],s=c,a=f,h=P,(s.test(i)||a.test(i)&&!h.test(i))&&(n=i)}return s=O,a=c,s.test(n)&&a.test(n)&&(s=y,n=n.replace(s,"")),"y"==o&&(n=o.toLowerCase()+n.substr(1)),n};return T}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.generateStopWordFilter=function(t){var e=t.reduce(function(t,e){return t[e]=e,t},{});return function(t){return t&&e[t]!==t?t:void 0}},t.stopWordFilter=t.generateStopWordFilter(["a","able","about","across","after","all","almost","also","am","among","an","and","any","are","as","at","be","because","been","but","by","can","cannot","could","dear","did","do","does","either","else","ever","every","for","from","get","got","had","has","have","he","her","hers","him","his","how","however","i","if","in","into","is","it","its","just","least","let","like","likely","may","me","might","most","must","my","neither","no","nor","not","of","off","often","on","only","or","other","our","own","rather","said","say","says","she","should","since","so","some","than","that","the","their","them","then","there","these","they","this","tis","to","too","twas","us","wants","was","we","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"]),t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(t){return t.replace(/^\W+/,"").replace(/\W+$/,"")},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.TokenStore=function(){this.root={docs:{}},this.length=0},t.TokenStore.load=function(t){var e=new this;return e.root=t.root,e.length=t.length,e},t.TokenStore.prototype.add=function(t,e,n){var n=n||this.root,i=t.charAt(0),r=t.slice(1);return i in n||(n[i]={docs:{}}),0===r.length?(n[i].docs[e.ref]=e,void(this.length+=1)):this.add(r,e,n[i])},t.TokenStore.prototype.has=function(t){if(!t)return!1;for(var e=this.root,n=0;n What does this module do? @@ -27,5 +27,5 @@ For dicom data, we use [pydicom](https://www.github.com/pydicom/pydicom) and for > Where do I go from here? -If you are new to deid or pydicom, we recommend you start with +If you are new to deid or pydicom, we recommend you start with the [getting started]({{ site.baseurl }}/getting-started/) pages. diff --git a/examples/README.md b/examples/README.md index e31a8ba7..75720099 100644 --- a/examples/README.md +++ b/examples/README.md @@ -3,6 +3,6 @@ - [Example deid spec files](deid): are found in the folder [deid](deid) - [Example dicom scripts](dicom): are found in the folder [dicom](dicom) -For detailed walk throughs, please reference our [docs](https://pydicom.github.io/deid). -For questions, issues, suggestions, or if you want to help out, +For detailed walk throughs, please reference our [docs](https://pydicom.github.io/deid). +For questions, issues, suggestions, or if you want to help out, please [open an issue](https://www.github.com/pydicom/deid). diff --git a/examples/dicom/README.md b/examples/dicom/README.md index a01f01f4..b453a96a 100644 --- a/examples/dicom/README.md +++ b/examples/dicom/README.md @@ -1,5 +1,5 @@ # Examples -This folder contains examples for interacting with deid! For the basic -dicom example, see the [recipes](recipes) folder. For tutorials with +This folder contains examples for interacting with deid! For the basic +dicom example, see the [recipes](recipes) folder. For tutorials with other examples, see [https://pydicom.github.io/deid/examples](https://pydicom.github.io/deid/examples). diff --git a/examples/dicom/dicom-extract/README.md b/examples/dicom/dicom-extract/README.md index aae8649d..f2e4a4ab 100644 --- a/examples/dicom/dicom-extract/README.md +++ b/examples/dicom/dicom-extract/README.md @@ -1,4 +1,4 @@ # Extraction from Dicom Headers -This is a user contribution that shows how to generate a csv file with +This is a user contribution that shows how to generate a csv file with dicom metadata. The example is provided in [create-dicom-csv.py](create-dicom-csv.py). diff --git a/examples/dicom/dicom-extract/create-dicom-csv.py b/examples/dicom/dicom-extract/create-dicom-csv.py index 4f155db1..26fc37e4 100644 --- a/examples/dicom/dicom-extract/create-dicom-csv.py +++ b/examples/dicom/dicom-extract/create-dicom-csv.py @@ -1,10 +1,11 @@ -from walkdir import filtered_walk, file_paths -import pydicom +import csv import os import platform -import csv -from collections.abc import Sequence from collections import OrderedDict +from collections.abc import Sequence + +import pydicom +from walkdir import file_paths, filtered_walk def load_tags_in_files(tag_file_path): diff --git a/examples/dicom/header-manipulation/file-meta/example.py b/examples/dicom/header-manipulation/file-meta/example.py index 18a7d567..b0d44ea1 100755 --- a/examples/dicom/header-manipulation/file-meta/example.py +++ b/examples/dicom/header-manipulation/file-meta/example.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -from deid.dicom import get_identifiers, replace_identifiers, get_files from deid.config import DeidRecipe from deid.data import get_dataset +from deid.dicom import get_files, get_identifiers, replace_identifiers # This is supported for deid.dicom version 0.1.34 diff --git a/examples/dicom/header-manipulation/func-replacement.py b/examples/dicom/header-manipulation/func-replacement.py index 3791a9ca..69ae7a25 100644 --- a/examples/dicom/header-manipulation/func-replacement.py +++ b/examples/dicom/header-manipulation/func-replacement.py @@ -1,7 +1,9 @@ #!/usr/bin/env python3 -from deid.dicom import get_files, replace_identifiers +# Create the DeidRecipe Instance from deid.dicom +from deid.config import DeidRecipe from deid.data import get_dataset +from deid.dicom import get_files, get_identifiers, replace_identifiers # This is an example of replacing fields in dicom headers, # but via a function instead of a preset identifier. @@ -11,9 +13,6 @@ dicom_files = list(get_files(base)) # todo : consider using generator functionality -# This is the function to get identifiers -from deid.dicom import get_identifiers - items = get_identifiers(dicom_files) # ** @@ -41,9 +40,6 @@ # output from the generate_uid function, which is expected in the item dict ################################## -# Create the DeidRecipe Instance from deid.dicom -from deid.config import DeidRecipe - recipe = DeidRecipe("deid.dicom") # To see an entire (raw in a dictionary) recipe just look at @@ -56,17 +52,16 @@ # What actions do we want to do on the header? recipe.get_actions() -""" -[{'action': 'REPLACE', - 'field': 'StudyInstanceUID', - 'value': 'func:generate_uid'}, - {'action': 'REPLACE', - 'field': 'SeriesInstanceUID', - 'value': 'func:generate_uid'}, - {'action': 'REPLACE', - 'field': 'FrameOfReferenceUID', - 'value': 'func:generate_uid'}] -""" + +# [{'action': 'REPLACE', +# 'field': 'StudyInstanceUID', +# 'value': 'func:generate_uid'}, +# {'action': 'REPLACE', +# 'field': 'SeriesInstanceUID', +# 'value': 'func:generate_uid'}, +# {'action': 'REPLACE', +# 'field': 'FrameOfReferenceUID', +# 'value': 'func:generate_uid'}] # We can filter to an action type (not useful here, we only have one type) recipe.get_actions(action="REPLACE") @@ -74,11 +69,10 @@ # or we can filter to a field recipe.get_actions(field="FrameOfReferenceUID") -""" -[{'action': 'REPLACE', - 'field': 'FrameOfReferenceUID', - 'value': 'func:generate_uid'}] -""" +# [{'action': 'REPLACE', +# 'field': 'FrameOfReferenceUID', +# 'value': 'func:generate_uid'}] + # and logically, both (not useful here) recipe.get_actions(field="PatientID", action="REMOVE") diff --git a/examples/dicom/header-manipulation/func-sequence-replace/example.py b/examples/dicom/header-manipulation/func-sequence-replace/example.py index 9f4a2eff..8199b994 100755 --- a/examples/dicom/header-manipulation/func-sequence-replace/example.py +++ b/examples/dicom/header-manipulation/func-sequence-replace/example.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from deid.dicom import get_identifiers, replace_identifiers from deid.config import DeidRecipe +from deid.dicom import get_identifiers, replace_identifiers # This is supported for deid.dicom version 0.1.34 diff --git a/examples/dicom/pixels/run-cleaner-client.py b/examples/dicom/pixels/run-cleaner-client.py index 4683ee9a..bddba32f 100644 --- a/examples/dicom/pixels/run-cleaner-client.py +++ b/examples/dicom/pixels/run-cleaner-client.py @@ -1,6 +1,9 @@ #!/usr/bin/env python3 -from deid.dicom import DicomCleaner +from deid.data import get_dataset + +# This will get a set of example cookie dicoms +from deid.dicom import DicomCleaner, get_files # This is a complete example of using the cleaning client to inspect # and clean pixels @@ -11,9 +14,6 @@ # 1. Get List of Files ######################################### -# This will get a set of example cookie dicoms -from deid.dicom import get_files -from deid.data import get_dataset base = get_dataset("dicom-cookies") dicom_files = list(get_files(base)) # todo : consider using generator functionality diff --git a/examples/dicom/pixels/run-inspect-pixels.py b/examples/dicom/pixels/run-inspect-pixels.py index c211de80..ba0eb33d 100644 --- a/examples/dicom/pixels/run-inspect-pixels.py +++ b/examples/dicom/pixels/run-inspect-pixels.py @@ -5,9 +5,10 @@ # https://pydicom.github.io/deid +from deid.data import get_dataset + # This will get a set of example cookie dicoms from deid.dicom import get_files, has_burned_pixels -from deid.data import get_dataset from deid.logger import bot bot.level = 3 diff --git a/examples/dicom/recipe/deid-dicom-example.py b/examples/dicom/recipe/deid-dicom-example.py index ee7162d1..2c070467 100644 --- a/examples/dicom/recipe/deid-dicom-example.py +++ b/examples/dicom/recipe/deid-dicom-example.py @@ -1,10 +1,16 @@ #!/usr/bin/env python3 -from deid.dicom import get_files, replace_identifiers -from deid.utils import get_installdir -from deid.data import get_dataset import os +# We can load in a cleaned file to see what was done +from pydicom import read_file + +# Create a DeidRecipe +from deid.config import DeidRecipe +from deid.data import get_dataset +from deid.dicom import get_files, get_identifiers, replace_identifiers +from deid.utils import get_installdir + # This is a complete example of doing de-identification. For details, see our docs # https://pydicom.github.io/deid @@ -14,9 +20,6 @@ dicom_files = list(get_files(base)) # todo : consider using generator functionality -# This is the function to get identifiers -from deid.dicom import get_identifiers - ids = get_identifiers(dicom_files) # ** @@ -40,9 +43,6 @@ # ################################## -# Create a DeidRecipe -from deid.config import DeidRecipe - recipe = DeidRecipe() # Since we didn't load a custom deid recipe text file, we get a default @@ -196,9 +196,6 @@ ) -# We can load in a cleaned file to see what was done -from pydicom import read_file - test_file = read_file(cleaned_files[0]) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..831adc62 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[tool.black] +profile = "black" +exclude = ["^env/"] + +[tool.isort] +profile = "black" # needed for black/isort compatibility +skip = [] diff --git a/setup.cfg b/setup.cfg index 8b143afe..0b1421f1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,16 @@ +[flake8] +exclude = benchmarks docs +max-line-length = 100 +ignore = E1 E2 E5 W5 +per-file-ignores = + deid/dicom/__init__.py:F401 + deid/utils/__init__.py:F401 + deid/config/__init__.py:F401 + deid/main/__init__.py:F401 + deid/logger/__init__.py:F401 + deid/dicom/actions/__init__.py:F401 + deid/dicom/pixels/__init__.py:F401 + [metadata] description-file = README.md diff --git a/setup.py b/setup.py index 0f2933f2..e6983635 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ -from setuptools import setup, find_packages import os +from setuptools import find_packages, setup + def get_lookup(): """ @@ -94,7 +95,7 @@ def get_requirements(lookup=None): long_description_content_type="text/markdown", keywords=KEYWORDS, install_requires=INSTALL_REQUIRES, - python_requires='>=3.7', + python_requires=">=3.7", classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console",