From 2d41acb86395273d4324b9cc6ca50ce69a506519 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Sat, 4 May 2019 13:47:29 -0400 Subject: [PATCH 1/3] Add failing tests for coded values handling --- census/tests/test_census.py | 55 ++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/census/tests/test_census.py b/census/tests/test_census.py index ed09652..228d2ed 100644 --- a/census/tests/test_census.py +++ b/census/tests/test_census.py @@ -3,12 +3,13 @@ import os import time import unittest +from unittest import mock from contextlib import closing import requests from census.core import ( - Census, UnsupportedYearException) + Census, UnsupportedYearException, CensusException) KEY = os.environ.get('CENSUS_KEY', '') @@ -121,6 +122,58 @@ def test_la_canada_2015(self): ) +@mock.patch('census.core.Client.get', autospec=True, return_value=[{'var': -666666666.0}]) +class TestCodedValues(CensusTestCase): + """ + Unit tests for handling coded values, like -66666666 and -999999999. + """ + def test_handle_666666666_as_null(self, mock_get): + """ + Test casting -666666666 values to null. + """ + return_val = self._client.acs5.get('NAME', + {'for': 'state:*'}, + year=2016, + handle_nulls=True) + self.assertEqual(return_val, [{'var': None}]) + + def test_handle_666666666_as_error(self, mock_get): + """ + Test raising an error for -666666666 values. + """ + with self.assertRaises(CensusException): + self._client.acs5.get('NAME', + {'for': 'state:*'}, + year=2016, + handle_nulls=False) + + def test_handle_666666666_default(self, mock_get): + """ + Test the default behavior of handling -666666666 values, which is to + cast them to null. + """ + return_val = self._client.acs5.get('NAME', {'for': 'state:*'}, year=2016) + self.assertEqual(return_val, [{'var': None}]) + + +class TestCodedValuesIntegration(CensusTestCase): + """ + Integration tests for handling coded values, like -666666666 and -999999999. + """ + def test_handle_666666666(self): + """ + Test the default behavior of handling -666666666 values, which is to + cast them to null. + """ + # This call should return a value of -666666666 + return_val = self._client.acs5.state_county_tract('B19081_001E', + 42, + 101, + '989100', + year=2016) + self.assertEqual(return_val[0]['B19081_001E'], None) + + class TestEndpoints(CensusTestCase): def check_endpoints(self, client_name, tests, **kwargs): From 648f7c89e1afcc660bcb98b49787ee74c3d88c21 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Sat, 4 May 2019 15:06:00 -0400 Subject: [PATCH 2/3] Handle coded values by casting to null or raising an error --- census/core.py | 82 +++++++++++++++++++++++++++++++++---- census/tests/test_census.py | 76 +++++++++++++++++++--------------- 2 files changed, 115 insertions(+), 43 deletions(-) diff --git a/census/core.py b/census/core.py index fcd441c..4927266 100644 --- a/census/core.py +++ b/census/core.py @@ -36,7 +36,42 @@ def list_or_str(v): return v return [v] +def cast_nulls(func): + """ + Decorator to format null values in API result casting functions. + """ + def null_wrapper(v, cast_nulls): + # This value indicates that there were too few observations to compute + # an estimate. See: + # https://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html + if str(v) == '-666666666': + if cast_nulls is True: + return None + else: + raise NullValueException('Unhandled coded value: ', str(v)) + else: + return func(v) + return null_wrapper + +@cast_nulls +def to_str(v): + """ + Cast an API result to a string. + """ + return str(v) + +@cast_nulls +def to_float(v): + """ + Cast an API result to a float. + """ + return float(v) + +@cast_nulls def float_or_str(v): + """ + Try casting an API result to a float, and fall back to a string. + """ try: return float(v) except ValueError: @@ -92,6 +127,10 @@ class UnsupportedYearException(CensusException): pass +class NullValueException(Exception): + pass + + class Client(object): endpoint_url = 'https://api.census.gov/data/%s/%s' definitions_url = 'https://api.census.gov/data/%s/%s/variables.json' @@ -158,6 +197,10 @@ def get(self, fields, geo, year=None, **kwargs): @retry_on_transient_error def query(self, fields, geo, year=None, **kwargs): + cast_nulls = kwargs.get('cast_nulls', True) + if cast_nulls not in [True, False]: + raise CensusException('cast_nulls argument must be True or False') + if year is None: year = self.default_year @@ -187,10 +230,31 @@ def query(self, fields, geo, year=None, **kwargs): headers = data.pop(0) types = [self._field_type(header, year) for header in headers] - results = [{header : (cast(item) if item is not None else None) - for header, cast, item - in zip(headers, types, d)} - for d in data] + results = [] + error = False + for d in data: + row = [] + for header, cast, item in zip(headers, types, d): + if item is not None: + try: + result = {header: cast(item, cast_nulls)} + except NullValueException: + # This value needs to raise an error, but we need the + # rest of the row values for context, so flag the + # error and continue the iteration + error = True + result = {header: item} + else: + result = None + row.append(result) + if error: + msg = 'Null estimate code found: ' + str(row) + msg += '\nSee the Census documentation for more information:' + msg += '\nhttps://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html' + raise CensusException(msg) + else: + for result in row: + results.append(result) return results elif resp.status_code == 204: @@ -204,17 +268,17 @@ def _field_type(self, field, year): url = self.definition_url % (year, self.dataset, field) resp = self.session.get(url) - types = {"fips-for" : str, - "fips-in" : str, + types = {"fips-for" : to_str, + "fips-in" : to_str, "int" : float_or_str, - "float": float, - "string": str} + "float": to_float, + "string": to_str} if resp.status_code == 200: predicate_type = resp.json().get("predicateType", "string") return types[predicate_type] else: - return str + return to_str @supported_years() def us(self, fields, **kwargs): diff --git a/census/tests/test_census.py b/census/tests/test_census.py index 228d2ed..23228b6 100644 --- a/census/tests/test_census.py +++ b/census/tests/test_census.py @@ -3,7 +3,6 @@ import os import time import unittest -from unittest import mock from contextlib import closing import requests @@ -122,57 +121,66 @@ def test_la_canada_2015(self): ) -@mock.patch('census.core.Client.get', autospec=True, return_value=[{'var': -666666666.0}]) class TestCodedValues(CensusTestCase): """ - Unit tests for handling coded values, like -66666666 and -999999999. + Tests for handling coded values, like -666666666 and -999999999. """ - def test_handle_666666666_as_null(self, mock_get): + def test_handle_666666666(self): """ - Test casting -666666666 values to null. + Test the default behavior of handling -666666666 values, which is to + cast them to null. """ - return_val = self._client.acs5.get('NAME', - {'for': 'state:*'}, - year=2016, - handle_nulls=True) - self.assertEqual(return_val, [{'var': None}]) + # This call should return a value of -666666666 + return_val = self._client.acs5.state_county_tract('B19081_001E', + 42, + 101, + '989100', + year=2016) + self.assertEqual(return_val[0]['B19081_001E'], None) - def test_handle_666666666_as_error(self, mock_get): + def test_handle_666666666_as_error(self): """ Test raising an error for -666666666 values. """ with self.assertRaises(CensusException): - self._client.acs5.get('NAME', - {'for': 'state:*'}, - year=2016, - handle_nulls=False) - - def test_handle_666666666_default(self, mock_get): - """ - Test the default behavior of handling -666666666 values, which is to - cast them to null. - """ - return_val = self._client.acs5.get('NAME', {'for': 'state:*'}, year=2016) - self.assertEqual(return_val, [{'var': None}]) - - -class TestCodedValuesIntegration(CensusTestCase): - """ - Integration tests for handling coded values, like -666666666 and -999999999. - """ - def test_handle_666666666(self): + return_val = self._client.acs5.state_county_tract('B19081_001E', + 42, + 101, + '989100', + year=2016, + cast_nulls=False) + + def test_handle_666666666_as_null(self): """ - Test the default behavior of handling -666666666 values, which is to - cast them to null. + Test casting -666666666 values to null. """ - # This call should return a value of -666666666 return_val = self._client.acs5.state_county_tract('B19081_001E', 42, 101, '989100', - year=2016) + year=2016, + cast_nulls=True) self.assertEqual(return_val[0]['B19081_001E'], None) + def test_bad_cast_nulls_argument(self): + """ + Test that an error gets raised for poorly-formated cast_nulls argument. + """ + with self.assertRaises(CensusException): + return_val = self._client.acs5.state('NAME', + Census.ALL, + cast_nulls='foobar') + + with self.assertRaises(CensusException): + return_val = self._client.acs5.state('NAME', + Census.ALL, + cast_nulls=None) + + with self.assertRaises(CensusException): + return_val = self._client.acs5.state('NAME', + Census.ALL, + cast_nulls=10) + class TestEndpoints(CensusTestCase): From e610ae76d61defd1158bbcabe786825e30f80388 Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 6 May 2019 22:03:27 -0400 Subject: [PATCH 3/3] Fix data formatting for individual API results in .get --- census/core.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/census/core.py b/census/core.py index 4927266..552d6e5 100644 --- a/census/core.py +++ b/census/core.py @@ -233,28 +233,27 @@ def query(self, fields, geo, year=None, **kwargs): results = [] error = False for d in data: - row = [] + result = {} for header, cast, item in zip(headers, types, d): if item is not None: try: - result = {header: cast(item, cast_nulls)} + var_value = cast(item, cast_nulls) except NullValueException: # This value needs to raise an error, but we need the - # rest of the row values for context, so flag the + # rest of the result values for context, so flag the # error and continue the iteration error = True - result = {header: item} + var_value = item else: - result = None - row.append(result) + var_value = None + result[header] = var_value if error: - msg = 'Null estimate code found: ' + str(row) + msg = 'Null estimate code found: ' + str(result) msg += '\nSee the Census documentation for more information:' msg += '\nhttps://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html' raise CensusException(msg) else: - for result in row: - results.append(result) + results.append(result) return results elif resp.status_code == 204: