From 5277cc1b30fc73a4ab9b345869272d642005fca4 Mon Sep 17 00:00:00 2001 From: Rajan Mudambi <11376379+rmudambi@users.noreply.github.com> Date: Fri, 31 Mar 2023 18:26:44 -0700 Subject: [PATCH 1/2] Bugfix/incorrect selection (#27) * fix incorrect selection on employer and mailing states --- src/pseudopeople/noise_functions.py | 10 +++++++--- tests/unit/test_column_noise.py | 1 - tests/unit/test_noise_form.py | 1 - 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/pseudopeople/noise_functions.py b/src/pseudopeople/noise_functions.py index b4b8110a..ba5ba25f 100644 --- a/src/pseudopeople/noise_functions.py +++ b/src/pseudopeople/noise_functions.py @@ -59,17 +59,21 @@ def generate_incorrect_selections( :returns: pd.Series where data has been noised with other values from a list of possibilities """ - col = column.name + selection_type = { + "employer_state": "state", + "mailing_address_state": "state", + }.get(str(column.name), column.name) + selection_options = pd.read_csv(paths.INCORRECT_SELECT_NOISE_OPTIONS_DATA) # Get possible noise values # todo: Update with exclusive resampling when vectorized_choice is improved - options = selection_options.loc[selection_options[col].notna(), col] + options = selection_options.loc[selection_options[selection_type].notna(), selection_type] new_values = vectorized_choice( options=options, n_to_choose=len(column), randomness_stream=randomness_stream, - additional_key=f"{additional_key}_{col}_incorrect_select_choice", + additional_key=f"{additional_key}_{column.name}_incorrect_select_choice", ).to_numpy() return pd.Series(new_values, index=column.index) diff --git a/tests/unit/test_column_noise.py b/tests/unit/test_column_noise.py index 9ee09288..c8c0802d 100644 --- a/tests/unit/test_column_noise.py +++ b/tests/unit/test_column_noise.py @@ -55,7 +55,6 @@ def default_configuration(): def test_generate_missing_data(dummy_dataset): - config = get_configuration() config.update( { diff --git a/tests/unit/test_noise_form.py b/tests/unit/test_noise_form.py index 1cfb9388..ee099f35 100644 --- a/tests/unit/test_noise_form.py +++ b/tests/unit/test_noise_form.py @@ -176,7 +176,6 @@ def test_correct_forms_are_used(func, form, mocker): def test_two_noise_functions_are_independent(mocker): - # Make simple config tree to test 2 noise functions work together config_tree = ConfigTree( { From 7d2741c44b5157c209c487aa0a1d0eea267e69c0 Mon Sep 17 00:00:00 2001 From: Rajan Mudambi <11376379+rmudambi@users.noreply.github.com> Date: Fri, 31 Mar 2023 18:30:25 -0700 Subject: [PATCH 2/2] generate hotfix release candidate v0.2.1 --- CHANGELOG.rst | 4 ++++ src/pseudopeople/__about__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 806058a5..c3b38b91 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,7 @@ +**0.2.1 - 03/31/23** + + - Fix bug preventing generation of W2/1099 forms + **0.2.0 - 03/31/23** - Implemented W2/1099 forms diff --git a/src/pseudopeople/__about__.py b/src/pseudopeople/__about__.py index c8ae9993..8167c4ad 100644 --- a/src/pseudopeople/__about__.py +++ b/src/pseudopeople/__about__.py @@ -13,7 +13,7 @@ __summary__ = "pseudopeople is package which adds noise to simulated census-scale data using standard scientific Python tools." __uri__ = "https://github.com/ihmeuw/pseudopeople" -__version__ = "0.2.0" +__version__ = "0.2.1" __author__ = "The pseudopeople developers" __email__ = "vivarium.dev@gmail.com"