diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index f02be217..0172848c 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -15,7 +15,7 @@ import random import hashlib import validators - +import re import logging from raven.contrib.flask import Sentry @@ -225,10 +225,13 @@ def remove_nested_keys(D, keys): def sanitize_dict_before_log(dict_to_sanitize): sensitive_keys = ['token'] # Add any other sensitive keys here + allowed_characters = r'[^A-Za-z0-9 ]' + replacement_character = '' sanitized_values = {} for key, value in dict_to_sanitize.items(): if key not in sensitive_keys: - value = str(value).replace('\n', '').replace('\r', '') + # value = str(value).replace('\n', '').replace('\r', '') + value = re.sub(allowed_characters, replacement_character, str(value)) sanitized_values[key] = value return sanitized_values diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index 491ede26..88f13826 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -28,6 +28,7 @@ from cdci_data_analysis.analysis.renku_helper import clone_renku_repo, checkout_branch_renku_repo, check_job_id_branch_is_present, get_repo_path, generate_commit_request_url, create_new_notebook_with_code, generate_nb_hash, create_renku_ini_config_obj, generate_ini_file_hash from cdci_data_analysis.analysis.drupal_helper import execute_drupal_request, get_drupal_request_headers, get_revnum, get_observations_for_time_range, generate_gallery_jwt_token, get_user_id, get_source_astrophysical_entity_id_by_source_name from cdci_data_analysis.plugins.dummy_plugin.data_server_dispatcher import DataServerQuery, ReturnProgressProductQuery +from cdci_data_analysis.flask_app.app import sanitize_dict_before_log # logger logger = logging.getLogger(__name__) @@ -70,6 +71,24 @@ def remove_args_from_dic(arg_dic, remove_keys): tem=0, ) +@pytest.mark.fast +def test_sanitize_dict_before_log(): + + test_dict = { + 'token': 'mytoken', + 'field': 'myfield\n\r', + 'username': 'myusername', + 'email': 'myemail@example.com' + } + + expected_dict = { + 'field': 'myfield', + 'username': 'myusername', + 'email': 'myemailexamplecom' + } + + sanitized_dict = sanitize_dict_before_log(test_dict) + assert sanitized_dict == expected_dict @pytest.mark.fast def test_js9(dispatcher_live_fixture):