diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a3ac618..33dd9dc 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,10 +1,10 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: + - main pull_request: - branches: [main, master] + branches: + - main name: R-CMD-check @@ -18,32 +18,85 @@ jobs: fail-fast: false matrix: config: - - {os: macos-latest, r: 'release'} + # - {os: macOS-latest, r: 'devel'} # Issue with loading 'units' dependency + - {os: macOS-latest, r: 'release'} + - {os: macOS-latest, r: 'oldrel'} + - {os: windows-latest, r: 'devel'} - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: windows-latest, r: 'oldrel'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel'} env: + R_KEEP_PKG_SOURCE: yes + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-pandoc@v2 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true + + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-r-dependencies@v2 + - name: "[Custom block] [MacOS] Install spatial libraries" + if: runner.os == 'macOS' + run: brew install udunits gdal proj geos + + - name: "[Custom block] [Linux] Install spatial libraries" + if: runner.os == 'Linux' + run: sudo apt-get install libudunits2-dev libgdal-dev libproj-dev libgeos-dev + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v3 with: - extra-packages: any::rcmdcheck - needs: check + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-${{ matrix.config.r }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE, build = FALSE) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + # - name: "[Custom block] [MacOS] Install units with configuration" + # if: runner.os == 'macOS' + # run: | + # install.packages('udunits2', type = 'source', configure.args = '--with-udunits2-lib=/usr/local/udunits2') + # install.packages('units', type = 'source', configure.args = '--with-udunits2-lib=/usr/local/udunits2') + # shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} - - uses: r-lib/actions/check-r-package@v2 + - name: Upload check results + if: failure() + uses: actions/upload-artifact@main with: - upload-snapshots: true + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check diff --git a/.github/workflows/rhub.yaml b/.github/workflows/rhub.yaml new file mode 100644 index 0000000..74ec7b0 --- /dev/null +++ b/.github/workflows/rhub.yaml @@ -0,0 +1,95 @@ +# R-hub's generic GitHub Actions workflow file. It's canonical location is at +# https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml +# You can update this file to a newer version using the rhub2 package: +# +# rhub::rhub_setup() +# +# It is unlikely that you need to modify this file manually. + +name: R-hub +run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" + +on: + workflow_dispatch: + inputs: + config: + description: 'A comma separated list of R-hub platforms to use.' + type: string + default: 'linux,windows,macos' + name: + description: 'Run name. You can leave this empty now.' + type: string + id: + description: 'Unique ID. You can leave this empty now.' + type: string + +jobs: + + setup: + runs-on: ubuntu-latest + outputs: + containers: ${{ steps.rhub-setup.outputs.containers }} + platforms: ${{ steps.rhub-setup.outputs.platforms }} + + steps: + # NO NEED TO CHECKOUT HERE + - uses: r-hub/actions/setup@v1 + with: + config: ${{ github.event.inputs.config }} + id: rhub-setup + + linux-containers: + needs: setup + if: ${{ needs.setup.outputs.containers != '[]' }} + runs-on: ubuntu-latest + name: ${{ matrix.config.label }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.containers) }} + container: + image: ${{ matrix.config.container }} + + steps: + - uses: r-hub/actions/checkout@v1 + - uses: r-hub/actions/platform-info@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/setup-deps@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/run-check@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + + other-platforms: + needs: setup + if: ${{ needs.setup.outputs.platforms != '[]' }} + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.label }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.platforms) }} + + steps: + - uses: r-hub/actions/checkout@v1 + - uses: r-hub/actions/setup-r@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} + - uses: r-hub/actions/platform-info@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/setup-deps@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} + - uses: r-hub/actions/run-check@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} diff --git a/DESCRIPTION b/DESCRIPTION index 6e84f7b..d334694 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ndi Title: Neighborhood Deprivation Indices -Version: 0.1.4.9001 -Date: 2023-01-18 +Version: 0.1.6.9002 +Date: 2024-08-22 Authors@R: c(person(given = "Ian D.", family = "Buller", @@ -21,28 +21,43 @@ Description: Computes various metrics of socio-economic deprivation and disparit and (2) based on Andrews et al. (2020) and Slotman et al. (2022) who use variables chosen by Roux and Mair (2010) - . Both are a decomposition - of multiple demographic characteristics from the U.S. Census Bureau - American Community Survey 5-year estimates (ACS-5; 2006-2010 onward). Using data - from the ACS-5 (2005-2009 onward), the package can also (1) compute the spatial - Racial Isolation Index (RI) based on Anthopolos et al. (2011) - , (2) compute the spatial Educational Isolation - Index (EI) based on Bravo et al. (2021) , - (3) compute the aspatial Index of Concentration at the Extremes (ICE) based on - Feldman et al. (2015) and Krieger et al. - (2016) , (4) compute the aspatial racial/ethnic - Dissimilarity Index based on Duncan & Duncan (1955) , (5) - compute the aspatial Atkinson Index based on Atkinson (1970) - , and (6) retrieve the aspatial Gini Index - based on Gini (1921) . + . Both are a decomposition of multiple + demographic characteristics from the U.S. Census Bureau American Community + Survey 5-year estimates (ACS-5; 2006-2010 onward). Using data from the ACS-5 + (2005-2009 onward), the package can also (1) compute the spatial Racial Isolation + Index (RI) based on Anthopolos et al. (2011) , + (2) compute the spatial Educational Isolation Index (EI) based on Bravo et al. + (2021) , (3) compute the aspatial Index of + Concentration at the Extremes (ICE) based on Feldman et al. (2015) + and Krieger et al. (2016) + , (4) compute the aspatial racial/ethnic + Dissimilarity Index (D) based on Duncan & Duncan (1955) , (5) + compute the aspatial income or racial/ethnic Atkinson Index (A) based on Atkinson + (1970) , (6) aspatial racial/ethnic Interaction + Index (xPy*) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell + (1954) , (7) aspatial racial/ethnic Correlation Ratio (V) + based on Bell (1954) and White (1986) , + (8) aspatial racial/ethnic Location Quotient (LQ) based on Merton (1939) + and Sudano et al. (2013) + , (9) aspatial racial/ethnic Local + Exposure and Isolation (LEx/Is) metric based on Bemanian & Beyer (2017) + , (10) aspatial racial/ethnic Delta (DEL) + based on Hoover (1941) and Duncan et al. (1961; + LC:60007089), (11) an index of spatial proximity (SP) based on White (1986) + and Blau (1977; ISBN-13:978-0-029-03660-0), and (12) the + aspatial racial/ethnic Isolatoin Index (xPx*) based on Lieberson (1981; + ISBN-13:978-1-032-53884-6) and Bell (1954) . Also using data + from the ACS-5 (2005-2009 onward), the package can retrieve the aspatial Gini + Index (G) based Gini (1921) . License: Apache License (>= 2.0) Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.2 +RoxygenNote: 7.3.2 Depends: R (>= 3.5.0) Imports: + car, dplyr, MASS, Matrix, @@ -52,11 +67,13 @@ Imports: stringr, tidycensus, tidyr, + tigris, + units, utils -Suggests: +Suggests: + DescTools, ggplot2, testthat, - tigris, R.rsp, spelling, usethis diff --git a/LICENSE.md b/LICENSE.md index fb3b0bd..9a76157 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -169,7 +169,7 @@ accepting any such warranty or additional liability. _END OF TERMS AND CONDITIONS_ - Copyright 2022 Ian D. Buller; NCI + Copyright 2024 Ian D. Buller; NCI Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/NAMESPACE b/NAMESPACE index ef49f1f..36af877 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,20 +2,31 @@ export(anthopolos) export(atkinson) +export(bell) +export(bemanian_beyer) export(bravo) export(duncan) export(gini) +export(hoover) export(krieger) +export(lieberson) export(messer) export(powell_wiley) +export(sudano) +export(white) +export(white_blau) import(dplyr) importFrom(MASS,ginv) importFrom(Matrix,sparseMatrix) +importFrom(car,logit) importFrom(psych,alpha) importFrom(psych,principal) +importFrom(sf,st_centroid) +importFrom(sf,st_distance) importFrom(sf,st_drop_geometry) importFrom(sf,st_geometry) importFrom(sf,st_intersects) +importFrom(sf,st_within) importFrom(stats,complete.cases) importFrom(stats,cor) importFrom(stats,cov2cor) @@ -30,4 +41,9 @@ importFrom(stringr,str_trim) importFrom(tidycensus,get_acs) importFrom(tidyr,pivot_longer) importFrom(tidyr,separate) +importFrom(tigris,combined_statistical_areas) +importFrom(tigris,core_based_statistical_areas) +importFrom(tigris,metro_divisions) +importFrom(units,drop_units) +importFrom(units,set_units) importFrom(utils,stack) diff --git a/NEWS.md b/NEWS.md index bb3632e..49bd22f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,30 +1,70 @@ # ndi (development version) -# ndi v0.1.4.9001 +## ndi v0.1.6.9002 ### New Features -* Added `atkinson()` function to compute the Atkinson Index (AI) based on [Atkinson (1970)](https://doi.org/10.1016/0022-0531(70)90039-6) for specified counties/tracts 2009 onward +* Added `hoover()` function to compute the aspatial racial/ethnic Delta (*DEL*) based on [Hoover (1941)](https://doi.org/10.1017/S0022050700052980) and Duncan et al. (1961; LC:60007089) +* Added `white_blau()` function to compute an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) +* Added `lieberson()` function to compute he aspatial racial/ethnic Isolation Index (_xPx\*_) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and and [Bell (1954)](https://doi.org/10.2307/2574118) +* Added `geo_large = 'cbsa'` for Core Based Statistical Areas, `geo_large = 'csa'` for Combined Statistical Areas, and `geo_large = 'metro'` for Metropolitan Divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `duncan()`, `hoover()`, `lieberson()`, `sudano()`, and `white()`, `white_blau()` functions. +* Thank you for the feature suggestions, [Symielle Gaston](https://orcid.org/0000-0001-9495-1592) ### Updates +* `bell()` function computes the Interaction Index (Bell) not the Isolation Index as previously documented. Updated documentation throughout +* Fixed bug in `bell()`, `bemanian_beyer()`, `duncan()`, `sudano()`, and `white()` functions when a smaller geography contains n=0 total population, will assign a value of zero (0) in the internal calculation instead of NA +* Renamed *AI* as *A*, *DI* as *D*, *Gini* as *G*, and *II* as _xPy\*_ to align with the definitions from [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). The output for `atkinson()` now produces `a` instead of `ai`. The output for `duncan()` now produces `d` instead of `ai`. The output for `gini()` now produces `g` instead of `gini`. The output for `bell()` now produces `xPy_star` instead of `II`. The internal functions `ai_fun()`, `di_fun()` and `ii_fun()` were renamed `a_fun()`, `d_fun()` and `xpy_star_fun()`, respectively. +* `tigris` and `units` are now Imports +* 'package.R' deprecated. Replaced with 'ndi-package.R' +* Re-formatted code and documentation throughout for consistent readability +* Updated documentation about value range of *V* (White) from `{0 to 1}` to `{-Inf to Inf}` +* Add examples for `hoover()` and `white_blau()` in vignette and README +* Reformatted functions for consistent internal structure +* Updated examples in vignette to showcase a larger variety of U.S. states +* Updated examples in functions to better describe the metrics +* Updated documentation formatting of metric names in all functions + +## ndi v0.1.5 + +### New Features +* None + +### Updates +* 'DescTools' is now Suggests to fix Rd cross-references NOTE +* Fixed 'lost braces in \itemize' NOTE for `anthopolos()`, `atkinson()`, `bell()`, `bemanian_beyer()`, `bravo()`, `duncan()`, `krieger()`, `messer()`, `powell_wiley()`, `sudano()`, and `white()` functions +* Fixed 'Moved Permanently' content by replacing the old URL with the new URL +* Fixed citation for [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) in CITATION + +## ndi v0.1.4 + +### New Features +* Added `atkinson()` function to compute the aspatial income or racial/ethnic Atkinson Index (*A*) based on [Atkinson (1970)](https://doi.org/10.1016/0022-0531(70)90039-6) for specified counties/tracts 2009 onward +* Added `bell()` function to compute the aspatial racial/ethnic Interaction Index (_xPy\*_) based on Shevky & Williams (1949; ISBN-13:978-0837156378) and [Bell (1954)](https://doi.org/10.2307/2574118) +* Added `white()` function to compute the aspatial racial/ethnic Correlation Ratio (*V*) based on [Bell (1954)](https://doi.org/10.2307/2574118) and [White (1986)](https://doi.org/10.2307/3644339) +* Added `sudano()` function to compute the aspatial racial/ethnic Location Quotient (*LQ*) based on [Merton (1939)](https://doi.org/10.2307/2084686) and [Sudano et al. (2013)](https://doi.org/10.1016/j.healthplace.2012.09.015) +* Added `bemanian_beyer()` function to compute the aspatial racial/ethnic Local Exposure and Isolation (*LEx/Is*) metric based on [Bemanian & Beyer (2017)](https://doi.org/10.1158/1055-9965.EPI-16-0926) + +### Updates +* `car` is now Imports * Fixed bug in reverse dependency check failure for `anthopolos()` and `bravo()` functions removing `returnValue()` when data are not missing -* Thank you, [Roger Bivand](https://github.com/rsbivand), for the catch. Relates to [ndi Issue #5](https://github.com/idblr/ndi/issues/5) +* Thank you, [Roger Bivand](https://github.com/rsbivand), for the catch. Relates to [*ndi* Issue #5](https://github.com/idblr/ndi/issues/5) * Updated `duncan()`, `gini()`, `krieger()`, `messer()`, and `powell_wiley()` for consistency in messaging when data are not missing -* Updated tests for `anthopolos()` and `bravo()` if `Sys.getenv("CENSUS_API_KEY") != ""` +* Updated tests for `anthopolos()` and `bravo()` if `Sys.getenv('CENSUS_API_KEY') != ''` * Added `omit_NAs` argument in `duncan()` function to choose if NA values will be included in its computation * In `duncan()` function, if any smaller geographic unit has zero counts the output for its larger geographic unit will be NA * Fixed bug in `duncan()` function for multiple `subgroup` and `subgroup_ref` selections * Updated documentation throughout * Added GitHub R-CMD-check +* Updated citation style for CITATION file -# ndi v0.1.3 +## ndi v0.1.3 ### New Features -* Added `duncan()` function to compute the Dissimilarity Index (DI) based on [Duncan & Duncan (1955)](https://doi.org/10.2307/2088328) for specified counties/tracts 2009 onward +* Added `duncan()` function to compute the Dissimilarity Index (*D*) based on [Duncan & Duncan (1955)](https://doi.org/10.2307/2088328) for specified counties/tracts 2009 onward * Thank you for the feature suggestion, [Jessica Madrigal](https://orcid.org/0000-0001-5303-5109) * Added 'utils.R' file with internal `di_fun()` function for `duncan()` function ### Updates -* Fixed bug in `bravo()` function where ACS-5 data (2005-2009) are from the "B15002" question and "B06009" after +* Fixed bug in `bravo()` function where ACS-5 data (2005-2009) are from the 'B15002' question and 'B06009' after * Fixed bug in missingness warning for all metrics * `utils` is now Imports * Updated vignette and README with new features @@ -33,21 +73,21 @@ * Updated CITATION with new citation for the additional metric * Updated maintainer contact information -# ndi v0.1.2 +## ndi v0.1.2 ### New Features -* Added `krieger()` function to compute the Index of Concentration at the Extremes (ICE) based on [Feldman et al. (2015)](https://www.doi.org/10.1136/jech-2015-205728) and [Krieger et al. (2016)](https://www.doi.org/10.2105/AJPH.2015.302955) for specified counties/tracts 2009 onward +* Added `krieger()` function to compute the Index of Concentration at the Extremes (*ICE*) based on [Feldman et al. (2015)](https://doi.org/10.1136/jech-2015-205728) and [Krieger et al. (2016)](https://doi.org/10.2105/AJPH.2015.302955) for specified counties/tracts 2009 onward * Thank you for the feature suggestion, [David Berrigan](https://orcid.org/0000-0002-5333-179X) -* Added `df` argument for the `messer()` and `powell_wiley()` functions to specify a pre-formatted dataset input for the NDI computation +* Added `df` argument for the `messer()` and `powell_wiley()` functions to specify a pre-formatted data set input for the NDI computation * Added `round_output` argument for the `messer()` and `powell_wiley()` functions to provide raw output as the default and rounded output as optional. * Thank you for the suggested enhancements, [Chris Prener](https://github.com/chris-prener) -* Added `DCtracts2020` a testing dataset for the `ndi` package and its documentation +* Added `DCtracts2020` a testing data set for the *ndi* package and its documentation ### Updates * Fixed bug in `powell_wiley()` function where the internal PCA will now run properly if only one factor has an eigenvalue above 1 * Optimized the code to calculate missingness in all functions * Thank you for the suggested bug fixes, [Jacob Englert](https://github.com/jacobenglert) -* Fixed bug in `powell_wiley()` function where "PctNoPhone" before 2015 is "DP04_0074PE" and "DP04_0075PE" after +* Fixed bug in `powell_wiley()` function where 'PctNoPhone' before 2015 is 'DP04_0074PE' and 'DP04_0075PE' after * Thank you for alerting this issue, [Jessica Gleason](https://orcid.org/0000-0001-9877-7931) * Relaxed `year` argument in functions to include any year after 2009 or 2010 for the indices * Cleaned-up output formatting in functions @@ -59,12 +99,12 @@ * Updated 'package.R' with new details * Updated CITATION with new citations for the additional metric -# ndi v0.1.1 +## ndi v0.1.1 ### New Features -* Added `anthopolos()` function to compute the Racial Isolation Index (RI) based on based on [Anthopolos et al. (2011)](https://www.doi.org/10.1016/j.sste.2011.06.002) for specified counties/tracts 2009 onward -* Added `bravo()` function to compute the Educational Isolation Index (EI) based on based on [Bravo et al. (2021)](https://www.doi.org/10.3390/ijerph18179384) for specified counties/tracts 2009 onward -* Added `gini()` function to retrieve the Gini Index based on [Gini (1921)](https://www.doi.org/10.2307/2223319) for specified counties/tracts 2009 onward +* Added `anthopolos()` function to compute the Racial Isolation Index (*RI*) based on based on [Anthopolos et al. (2011)](https://doi.org/10.1016/j.sste.2011.06.002) for specified counties/tracts 2009 onward +* Added `bravo()` function to compute the Educational Isolation Index (*EI*) based on based on [Bravo et al. (2021)](https://doi.org/10.3390/ijerph18179384) for specified counties/tracts 2009 onward +* Added `gini()` function to retrieve the Gini Index (*G*) based on [Gini (1921)](https://doi.org/10.2307/2223319) for specified counties/tracts 2009 onward * Thank you for the feature suggestions, [Jessica Madrigal](https://orcid.org/0000-0001-5303-5109) ### Updates @@ -75,8 +115,8 @@ * Updated 'package.R' with new details and section * Updated CITATION with new citations for the additional metrics -# ndi v0.1.0 +## ndi v0.1.0 * Fixed invalid URL and typos in package README.md -# ndi v0.0.1 +## ndi v0.0.1 * Initial CRAN submission diff --git a/R/DCtracts2020.R b/R/DCtracts2020.R index d085901..1e466bf 100644 --- a/R/DCtracts2020.R +++ b/R/DCtracts2020.R @@ -32,4 +32,4 @@ #' head(DCtracts2020) #' #' @source \url{https://github.com/idblr/ndi/blob/master/README.md} -"DCtracts2020" +'DCtracts2020' diff --git a/R/anthopolos.R b/R/anthopolos.R index f11c146..1d54da1 100644 --- a/R/anthopolos.R +++ b/R/anthopolos.R @@ -1,50 +1,50 @@ -#' Racial Isolation Index based on Anthopolos et al. (2011) -#' +#' Racial Isolation Index based on Anthopolos et al. (2011) +#' #' Compute the spatial Racial Isolation Index (Anthopolos) of selected subgroup(s). #' -#' @param geo Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}. +#' @param geo Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}. #' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. #' @param subgroup Character string specifying the racial/ethnic subgroup(s). See Details for available choices. #' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. #' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics #' -#' @details This function will compute the spatial Racial Isolation Index (RI) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002} who originally designed the metric for the racial isolation of non-Hispanic Black individuals. This function provides the computation of RI for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). -#' +#' @details This function will compute the spatial Racial Isolation Index (\emph{RI}) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002} who originally designed the metric for the racial isolation of non-Hispanic Black individuals. This function provides the computation of \emph{RI} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' #' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: #' \itemize{ -#' \item{B03002_002: }{not Hispanic or Latino "NHoL"} -#' \item{B03002_003: }{not Hispanic or Latino, white alone "NHoLW"} -#' \item{B03002_004: }{not Hispanic or Latino, Black or African American alone "NHoLB"} -#' \item{B03002_005: }{not Hispanic or Latino, American Indian and Alaska Native alone "NHoLAIAN"} -#' \item{B03002_006: }{not Hispanic or Latino, Asian alone "NHoLA"} -#' \item{B03002_007: }{not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "NHoLNHOPI"} -#' \item{B03002_008: }{not Hispanic or Latino, Some other race alone "NHoLSOR"} -#' \item{B03002_009: }{not Hispanic or Latino, Two or more races "NHoLTOMR"} -#' \item{B03002_010: }{not Hispanic or Latino, Two races including Some other race "NHoLTRiSOR"} -#' \item{B03002_011: }{not Hispanic or Latino, Two races excluding Some other race, and three or more races "NHoLTReSOR"} -#' \item{B03002_012: }{Hispanic or Latino "HoL"} -#' \item{B03002_013: }{Hispanic or Latino, white alone "HoLW"} -#' \item{B03002_014: }{Hispanic or Latino, Black or African American alone "HoLB"} -#' \item{B03002_015: }{Hispanic or Latino, American Indian and Alaska Native alone "HoLAIAN"} -#' \item{B03002_016: }{Hispanic or Latino, Asian alone "HoLA"} -#' \item{B03002_017: }{Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "HoLNHOPI"} -#' \item{B03002_018: }{Hispanic or Latino, Some other race alone "HoLSOR"} -#' \item{B03002_019: }{Hispanic or Latino, Two or more races "HoLTOMR"} -#' \item{B03002_020: }{Hispanic or Latino, Two races including Some other race "HoLTRiSOR"} -#' \item{B03002_021: }{Hispanic or Latino, Two races excluding Some other race, and three or more races "HoLTReSOR"} +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone\code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} #' } -#' -#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and RI values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the RI for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. -#' -#' A census geography (and its neighbors) that has nearly all of its population who identify with the specified race/ethnicity subgroup(s) (e.g., non-Hispanic or Latino, Black or African American alone) will have an RI value close to 1. In contrast, a census geography (and its neighbors) that has nearly none of its population who identify with the specified race/ethnicity subgroup(s) (e.g., not non-Hispanic or Latino, Black or African American alone) will have an RI value close to 0. -#' +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and \emph{RI} values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the \emph{RI} for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. +#' +#' A census geography (and its neighbors) that has nearly all of its population who identify with the specified race/ethnicity subgroup(s) (e.g., non-Hispanic or Latino, Black or African American alone) will have an \emph{RI} value close to 1. In contrast, a census geography (and its neighbors) that has nearly none of its population who identify with the specified race/ethnicity subgroup(s) (e.g., not non-Hispanic or Latino, Black or African American alone) will have an \emph{RI} value close to 0. +#' #' @return An object of class 'list'. This is a named list with the following components: -#' +#' #' \describe{ -#' \item{\code{ri}}{An object of class 'tbl' for the GEOID, name, RI, and raw census values of specified census geographies.} -#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute RI.} +#' \item{\code{ri}}{An object of class 'tbl' for the GEOID, name, \emph{RI}, and raw census values of specified census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{RI}.} #' } -#' +#' #' @import dplyr #' @importFrom Matrix sparseMatrix #' @importFrom sf st_drop_geometry st_geometry st_intersects @@ -52,159 +52,199 @@ #' @importFrom tidycensus get_acs #' @importFrom tidyr pivot_longer separate #' @export -#' +#' #' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). #' #' @examples #' \dontrun{ #' # Wrapped in \dontrun{} because these examples require a Census API key. -#' -#' # Tract-level metric (2020) -#' anthopolos(geo = "tract", state = "GA", year = 2020, subgroup = c("NHoLB", "HoLB")) -#' -#' # County-level metric (2020) -#' anthopolos(geo = "county", state = "GA", year = 2020, subgroup = c("NHoLB", "HoLB")) -#' +#' +#' # Racial Isolation Index of Black populations +#' ## of census tracts within Georgia, U.S.A., counties (2020) +#' anthopolos( +#' geo = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = c('NHoLB', 'HoLB') +#' ) +#' #' } -#' -anthopolos <- function(geo = "tract", year = 2020, subgroup, quiet = FALSE, ...) { - - # Check arguments - match.arg(geo, choices = c("county", "tract")) - stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward - match.arg(subgroup, several.ok = TRUE, - choices = c("NHoL", "NHoLW", "NHoLB", "NHoLAIAN", "NHoLA", "NHoLNHOPI", - "NHoLSOR", "NHoLTOMR", "NHoLTRiSOR", "NHoLTReSOR", - "HoL", "HoLW", "HoLB", "HoLAIAN", "HoLA", "HoLNHOPI", - "HoLSOR", "HoLTOMR", "HoLTRiSOR", "HoLTReSOR")) - - # Select census variables - vars <- c(TotalPop = "B03002_001", - NHoL = "B03002_002", - NHoLW = "B03002_003", - NHoLB = "B03002_004", - NHoLAIAN = "B03002_005", - NHoLA = "B03002_006", - NHoLNHOPI = "B03002_007", - NHoLSOR = "B03002_008", - NHoLTOMR = "B03002_009", - NHoLTRiSOR = "B03002_010", - NHoLTReSOR = "B03002_011", - HoL = "B03002_012", - HoLW = "B03002_013", - HoLB = "B03002_014", - HoLAIAN = "B03002_015", - HoLA = "B03002_016", - HoLNHOPI = "B03002_017", - HoLSOR = "B03002_018", - HoLTOMR = "B03002_019", - HoLTRiSOR = "B03002_020", - HoLTReSOR = "B03002_021") - - selected_vars <- vars[c("TotalPop", subgroup)] - out_names <- names(selected_vars) # save for output - prefix <- "subgroup" - suffix <- seq(1:length(subgroup)) - names(selected_vars) <- c("TotalPop", paste(prefix, suffix, sep = "")) - in_names <- paste(names(selected_vars), "E", sep = "") - - # Acquire RI variables and sf geometries - ri_vars <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo, - year = year, - output = "wide", - variables = selected_vars, - geometry = TRUE, ...))) - - - if (geo == "tract") { - ri_vars <- ri_vars %>% - tidyr::separate(NAME, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]","", tract)) - } else { - ri_vars <- ri_vars %>% tidyr::separate(NAME, into = c("county", "state"), sep = ",") - } - - ri_vars <- ri_vars %>% - dplyr::mutate(subgroup = rowSums(sf::st_drop_geometry(ri_vars[ , in_names[-1]]))) - - # Compute RI - ## From Anthopolos et al. (2011) https://doi.org/10.1016/j.sste.2011.06.002 - ## RI_{im} = (Sigma_{j∈∂_{i}} w_{ij} * T_{jm}) / (Sigma_{j∈∂_{i}} w_{ij} * T_{j}) - ## Where: - ## ∂_{i} denotes the set of index units i and its neighbors - ## Given M mutually exclusive racial/ethnic subgroups, m indexes the subgroups of M - ## T_{i} denotes the total population in region i (TotalPop) - ## T_{im} denotes the population of the selected subgroup(s) (subgroup1, ...) - ## w_{ij} denotes a nXn first-order adjacency matrix, where n is the number of census geometries in the study area - ### and the entries of w_{ij} are set to 1 if a boundary is shared by region i and region j and zero otherwise - ### Entries of the main diagonal (since i∈∂_{i}, w_{ij} = w_{ii} when j = i) of w_{ij} are set to 1.5 - ### such that the weight of the index unit, i, is larger than the weights assigned to adjacent tracts - - ## Geospatial adjacency matrix (wij) - tmp <- sf::st_intersects(sf::st_geometry(ri_vars), sparse = TRUE) - names(tmp) <- as.character(seq_len(nrow(ri_vars))) - tmpL <- length(tmp) - tmpcounts <- unlist(Map(length, tmp)) - tmpi <- rep(1:tmpL, tmpcounts) - tmpj <- unlist(tmp) - wij <- Matrix::sparseMatrix(i = tmpi, j = tmpj, x = 1, dims = c(tmpL, tmpL)) - diag(wij) <- 1.5 - - ## Compute - ri_vars <- sf::st_drop_geometry(ri_vars) # drop geometries (can join back later) - RIim <- list() - for (i in 1:dim(wij)[1]){ - RIim[[i]] <- sum(as.matrix(wij[i, ])*ri_vars[ , "subgroup"]) / sum(as.matrix(wij[i, ])*ri_vars[, "TotalPopE"]) - } - ri_vars$RI <- unlist(RIim) - - # Warning for missingness of census characteristics - missingYN <- ri_vars[ , in_names] - names(missingYN) <- out_names - missingYN <- missingYN %>% - tidyr::pivot_longer(cols = dplyr::everything(), - names_to = "variable", - values_to = "val") %>% - dplyr::group_by(variable) %>% - dplyr::summarise(total = dplyr::n(), - n_missing = sum(is.na(val)), - percent_missing = paste0(round(mean(is.na(val)) * 100, 2), " %")) - - if (quiet == FALSE) { - # Warning for missing census data - if (sum(missingYN$n_missing) > 0) { - message("Warning: Missing census data") +#' +anthopolos <- function(geo = 'tract', + year = 2020, + subgroup, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo, choices = c('county', 'tract')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup)] + out_names <- names(selected_vars) # save for output + prefix <- 'subgroup' + suffix <- seq(1:length(subgroup)) + names(selected_vars) <- c('TotalPop', paste0(prefix, suffix)) + in_names <- paste0(names(selected_vars), 'E') + + # Acquire RI variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + ... + ) + )) + + if (geo == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } else { + out_dat <- out_dat %>% + tidyr::separate(NAME, into = c('county', 'state'), sep = ',') } + + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(sf::st_drop_geometry(out_dat[, in_names[-1]]))) + + # Compute RI + ## From Anthopolos et al. (2011) https://doi.org/10.1016/j.sste.2011.06.002 + ## RI_{im} = (Sigma_{j∈∂_{i}} w_{ij} * T_{jm}) / (Sigma_{j∈∂_{i}} w_{ij} * T_{j}) + ## Where: + ## ∂_{i} denotes the set of index units i and its neighbors + ## Given M mutually exclusive racial/ethnic subgroups, m indexes the subgroups of M + ## T_{i} denotes the total population in region i (TotalPop) + ## T_{im} denotes the population of the selected subgroup(s) (subgroup1, ...) + ## w_{ij} denotes a nXn first-order adjacency matrix, where n is the number of census geometries in the study area + ### and the entries of w_{ij} are set to 1 if a boundary is shared by region i and region j and zero otherwise + ### Entries of the main diagonal (since i∈∂_{i}, w_{ij} = w_{ii} when j = i) of w_{ij} are set to 1.5 + ### such that the weight of the index unit, i, is larger than the weights assigned to adjacent tracts + + ## Geospatial adjacency matrix (w_ij) + tmp <- out_dat %>% + sf::st_geometry() %>% + sf::st_intersects(sparse = TRUE) + names(tmp) <- as.character(seq_len(nrow(out_dat))) + tmp_L <- length(tmp) + tmp_counts <- unlist(Map(length, tmp)) + tmp_i <- rep(1:tmp_L, tmp_counts) + tmp_j <- unlist(tmp) + w_ij <- Matrix::sparseMatrix( + i = tmp_i, + j = tmp_j, + x = 1, + dims = c(tmp_L, tmp_L) + ) + diag(w_ij) <- 1.5 + + ## Compute + out_dat <- out_dat %>% + sf::st_drop_geometry() # drop geometries (can join back later) + out_tmp <- list() + for (i in 1:dim(w_ij)[1]) { + out_tmp[[i]] <- sum(as.matrix(w_ij[i,]) * out_dat[, 'subgroup']) / + sum(as.matrix(w_ij[i,]) * out_dat[, 'TotalPopE']) + } + out_dat$RI <- unlist(out_tmp) + + # Warning for missingness of census characteristics + missingYN <- out_dat[, in_names] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo == 'tract') { + out <- out_dat %>% + dplyr::select(c('GEOID', 'state', 'county', 'tract', 'RI', dplyr::all_of(in_names))) + names(out) <- c('GEOID', 'state', 'county', 'tract', 'RI', out_names) + } else { + out <- out_dat %>% + dplyr::select(c('GEOID', 'state', 'county', 'RI', dplyr::all_of(in_names))) + names(out) <- c('GEOID', 'state', 'county', 'RI', out_names) + } + + out <- out %>% + dplyr::mutate( + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(ri = out, missing = missingYN) + + return(out) } - - # Format output - if (geo == "tract") { - ri <- ri_vars %>% - dplyr::select(c("GEOID", - "state", - "county", - "tract", - "RI", - dplyr::all_of(in_names))) - names(ri) <- c("GEOID", "state", "county", "tract", "RI", out_names) - } else { - ri <- ri_vars %>% - dplyr::select(c("GEOID", - "state", - "county", - "RI", - dplyr::all_of(in_names))) - names(ri) <- c("GEOID", "state", "county", "RI", out_names) - } - - ri <- ri %>% - dplyr::mutate(state = stringr::str_trim(state), - county = stringr::str_trim(county)) %>% - dplyr::arrange(GEOID) %>% - dplyr::as_tibble() - - out <- list(ri = ri, - missing = missingYN) - - return(out) -} diff --git a/R/atkinson.R b/R/atkinson.R index 23ae8a1..5d38eb7 100644 --- a/R/atkinson.R +++ b/R/atkinson.R @@ -1,9 +1,9 @@ -#' Atkinson Index based on Atkinson (1970) -#' +#' Atkinson Index based on Atkinson (1970) +#' #' Compute the aspatial Atkinson Index of income or selected racial/ethnic subgroup(s) and U.S. geographies. #' -#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = "county"}. -#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = "tract"}. +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. #' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. #' @param subgroup Character string specifying the income or racial/ethnic subgroup(s) as the comparison population. See Details for available choices. #' @param epsilon Numerical. Shape parameter that denotes the aversion to inequality. Value must be between 0 and 1.0 (the default is 0.5). @@ -11,244 +11,394 @@ #' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. #' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics #' -#' @details This function will compute the aspatial Atkinson Index (AI) of income or selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}. This function provides the computation of AI for median household income and any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). -#' -#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. When \code{subgroup = "MedHHInc"}, the metric will be computed for median household income ("B19013_001"). The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' @details This function will compute the aspatial Atkinson Index (\emph{A}) of income or selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}. This function provides the computation of \emph{A} for median household income and any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. When \code{subgroup = 'MedHHInc'}, the metric will be computed for median household income ('B19013_001'). The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: #' \itemize{ -#' \item{B03002_002: }{not Hispanic or Latino "NHoL"} -#' \item{B03002_003: }{not Hispanic or Latino, white alone "NHoLW"} -#' \item{B03002_004: }{not Hispanic or Latino, Black or African American alone "NHoLB"} -#' \item{B03002_005: }{not Hispanic or Latino, American Indian and Alaska Native alone "NHoLAIAN"} -#' \item{B03002_006: }{not Hispanic or Latino, Asian alone "NHoLA"} -#' \item{B03002_007: }{not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "NHoLNHOPI"} -#' \item{B03002_008: }{not Hispanic or Latino, Some other race alone "NHoLSOR"} -#' \item{B03002_009: }{not Hispanic or Latino, Two or more races "NHoLTOMR"} -#' \item{B03002_010: }{not Hispanic or Latino, Two races including Some other race "NHoLTRiSOR"} -#' \item{B03002_011: }{not Hispanic or Latino, Two races excluding Some other race, and three or more races "NHoLTReSOR"} -#' \item{B03002_012: }{Hispanic or Latino "HoL"} -#' \item{B03002_013: }{Hispanic or Latino, white alone "HoLW"} -#' \item{B03002_014: }{Hispanic or Latino, Black or African American alone "HoLB"} -#' \item{B03002_015: }{Hispanic or Latino, American Indian and Alaska Native alone "HoLAIAN"} -#' \item{B03002_016: }{Hispanic or Latino, Asian alone "HoLA"} -#' \item{B03002_017: }{Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "HoLNHOPI"} -#' \item{B03002_018: }{Hispanic or Latino, Some other race alone "HoLSOR"} -#' \item{B03002_019: }{Hispanic or Latino, Two or more races "HoLTOMR"} -#' \item{B03002_020: }{Hispanic or Latino, Two races including Some other race "HoLTRiSOR"} -#' \item{B03002_021: }{Hispanic or Latino, Two races excluding Some other race, and three or more races "HoLTReSOR"} +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} #' } #' #' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -#' -#' AI is a measure of the evenness of residential inequality (e.g., racial/ethnic segregation) when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. The AI metric can range in value from 0 to 1 with smaller values indicating lower levels of inequality (e.g., less segregation). -#' -#' The \code{epsilon} argument that determines how to weight the increments to inequality contributed by different proportions of the Lorenz curve. A user must explicitly decide how heavily to weight smaller geographical units at different points on the Lorenz curve (i.e., whether the index should take greater account of differences among areas of over- or under-representation). The \code{epsilon} argument must have values between 0 and 1.0. For \code{0 <= epsilon < 0.5} or less "inequality-averse," smaller geographical units with a subgroup proportion smaller than the subgroup proportion of the larger geographical unit contribute more to inequality ("over-representation"). For \code{0.5 < epsilon <= 1.0} or more "inequality-averse," smaller geographical units with a subgroup proportion larger than the subgroup proportion of the larger geographical unit contribute more to inequality ("under-representation"). If \code{epsilon = 0.5} (the default), units of over- and under-representation contribute equally to the index. See Section 2.3 of Saint-Jacques et al. (2020) \doi{10.48550/arXiv.2002.05819} for one method to select \code{epsilon}. -#' -#' Larger geographies available include state \code{geo_large = "state"}, county \code{geo_large = "county"}, and census tract \code{geo_large = "tract"} levels. Smaller geographies available include, county \code{geo_small = "county"}, census tract \code{geo_small = "tract"}, and census block group \code{geo_small = "block group"} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the AI value returned is NA. +#' +#' \emph{A} is a measure of the evenness of residential inequality (e.g., racial/ethnic segregation) when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. \emph{A} can range in value from 0 to 1 with smaller values indicating lower levels of inequality (e.g., less segregation). +#' +#' The \code{epsilon} argument that determines how to weight the increments to inequality contributed by different proportions of the Lorenz curve. A user must explicitly decide how heavily to weight smaller geographical units at different points on the Lorenz curve (i.e., whether the index should take greater account of differences among areas of over- or under-representation). The \code{epsilon} argument must have values between 0 and 1.0. For \code{0 <= epsilon < 0.5} or less 'inequality-averse,' smaller geographical units with a subgroup proportion smaller than the subgroup proportion of the larger geographical unit contribute more to inequality ('over-representation'). For \code{0.5 < epsilon <= 1.0} or more 'inequality-averse,' smaller geographical units with a subgroup proportion larger than the subgroup proportion of the larger geographical unit contribute more to inequality ('under-representation'). If \code{epsilon = 0.5} (the default), units of over- and under-representation contribute equally to the index. See Section 2.3 of Saint-Jacques et al. (2020) \doi{10.48550/arXiv.2002.05819} for one method to select \code{epsilon}. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{A} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{A} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{A} computation. #' #' @return An object of class 'list'. This is a named list with the following components: -#' +#' #' \describe{ -#' \item{\code{ai}}{An object of class 'tbl' for the GEOID, name, and AI at specified larger census geographies.} -#' \item{\code{ai_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} -#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute AI} +#' \item{\code{a}}{An object of class 'tbl' for the GEOID, name, and \emph{A} at specified larger census geographies.} +#' \item{\code{a_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{A}.} #' } -#' +#' #' @import dplyr -#' @importFrom sf st_drop_geometry +#' @importFrom sf st_drop_geometry st_within #' @importFrom stats na.omit #' @importFrom tidycensus get_acs #' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions #' @importFrom utils stack #' @export -#' +#' #' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). #' #' @examples #' \dontrun{ #' # Wrapped in \dontrun{} because these examples require a Census API key. -#' -#' # Atkinson Index of non-Hispanic Black vs. non-Hispanic white populations -#' ## of census tracts within Georgia, U.S.A., counties (2020) -#' atkinson(geo_large = "county", geo_small = "tract", state = "GA", year = 2020, -#' subgroup = "NHoLB") -#' +#' +#' # Atkinson Index (a measure of the evenness) of Black populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' atkinson( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = c('NHoLB', 'HoLB') +#' ) +#' #' } -#' -atkinson <- function(geo_large = "county", geo_small = "tract", year = 2020, subgroup, epsilon = 0.5, omit_NAs = TRUE, quiet = FALSE, ...) { - - # Check arguments - match.arg(geo_large, choices = c("state", "county", "tract")) - match.arg(geo_small, choices = c("county", "tract", "block group")) - stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward - match.arg(subgroup, several.ok = TRUE, - choices = c("NHoL", "NHoLW", "NHoLB", "NHoLAIAN", "NHoLA", "NHoLNHOPI", - "NHoLSOR", "NHoLTOMR", "NHoLTRiSOR", "NHoLTReSOR", - "HoL", "HoLW", "HoLB", "HoLAIAN", "HoLA", "HoLNHOPI", - "HoLSOR", "HoLTOMR", "HoLTRiSOR", "HoLTReSOR", "MedHHInc")) - stopifnot(is.numeric(epsilon), epsilon >= 0 , epsilon <= 1) # values between 0 and 1 - - # Select census variables - vars <- c(NHoL = "B03002_002", - NHoLW = "B03002_003", - NHoLB = "B03002_004", - NHoLAIAN = "B03002_005", - NHoLA = "B03002_006", - NHoLNHOPI = "B03002_007", - NHoLSOR = "B03002_008", - NHoLTOMR = "B03002_009", - NHoLTRiSOR = "B03002_010", - NHoLTReSOR = "B03002_011", - HoL = "B03002_012", - HoLW = "B03002_013", - HoLB = "B03002_014", - HoLAIAN = "B03002_015", - HoLA = "B03002_016", - HoLNHOPI = "B03002_017", - HoLSOR = "B03002_018", - HoLTOMR = "B03002_019", - HoLTRiSOR = "B03002_020", - HoLTReSOR = "B03002_021", - MedHHInc = "B19013_001") - - selected_vars <- vars[subgroup] - out_names <- names(selected_vars) # save for output - in_subgroup <- paste(subgroup, "E", sep = "") - - # Acquire AI variables and sf geometries - ai_data <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo_small, - year = year, - output = "wide", - variables = selected_vars, - geometry = TRUE, - keep_geo_vars = TRUE, ...))) - - # Format output - if (geo_small == "county") { - ai_data <- sf::st_drop_geometry(ai_data) %>% - tidyr::separate(NAME.y, into = c("county", "state"), sep = ",") - } - if (geo_small == "tract") { - ai_data <- sf::st_drop_geometry(ai_data) %>% - tidyr::separate(NAME.y, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]", "", tract)) - } - if (geo_small == "block group") { - ai_data <- sf::st_drop_geometry(ai_data) %>% - tidyr::separate(NAME.y, into = c("block.group", "tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]", "", tract), - block.group = gsub("[^0-9\\.]", "", block.group)) - } +#' +atkinson <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + epsilon = 0.5, + omit_NAs = TRUE, + quiet = FALSE, + ...) { - # Grouping IDs for AI computation - if (geo_large == "tract") { - ai_vars <- ai_data %>% - dplyr::mutate(oid = paste(.$STATEFP, .$COUNTYFP, .$TRACTCE, sep = ""), - state = stringr::str_trim(state), - county = stringr::str_trim(county)) - } - if (geo_large == "county") { - ai_vars <- ai_data %>% - dplyr::mutate(oid = paste(.$STATEFP, .$COUNTYFP, sep = ""), - state = stringr::str_trim(state), - county = stringr::str_trim(county)) - } - if (geo_large == "state") { - ai_vars <- ai_data %>% - dplyr::mutate(oid = .$STATEFP, - state = stringr::str_trim(state)) - } - - # Count of racial/ethnic subgroup populations - ## Count of racial/ethnic subgroup population - if (length(in_subgroup) == 1) { - ai_vars <- ai_vars %>% - dplyr::mutate(subgroup = .[ , in_subgroup]) - } else { - ai_vars <- ai_vars %>% - dplyr::mutate(subgroup = rowSums(.[ , in_subgroup])) - } - - # Compute AI - ## From Atkinson (1970) https://doi.org/10.1016/0022-0531(70)90039-6 - ## A_{\epsilon}(x_{1},...,x_{n}) = \begin{Bmatrix} - ## 1 - (\frac{1}{n}\sum_{i=1}^{n}x_{i}^{1-\epsilon})^{1/(1-\epsilon)}/(\frac{1}{n}\sum_{i=1}^{n}x_{i}) & \mathrm{if\:} \epsilon \neq 1 \\ - ## 1 - (\prod_{i=1}^{n}x_{i})^{1/n}/(\frac{1}{n}\sum_{i=1}^{n}x_{i}) & \mathrm{if\:} \epsilon = 1 \\ - ## \end{Bmatrix} - ## Where the Atkinson index (A) is defined for a population subgroup count (x) of a given smaller geographical unit (i) for n smaller geographical units - ## and an inequality-aversion parameter (epsilon) - ## If denoting the Hölder mean (based on `Atkinson()` function in 'DescTools' package) by - ## M_{p}(x_{1},...,x_{n}) = \begin{Bmatrix} - ## (\frac{1}{n}\sum_{i=1}^{n}x_{i}^{p})^{1/p} & \mathrm{if\:} p \neq 0 \\ - ## (\prod_{i=1}^{n}x_{i})^{1/n} & \mathrm{if\:} p = 0 \\ - ## \end{Bmatrix} - ## then AI is - ## A_{\epsilon}(x_{1},...,x_{n}) = 1 - \frac{M_{1-\epsilon}(x_{1},...,x_{n})}{M_{1}(x_{1},...,x_{n})} - - ## Compute - AItmp <- ai_vars %>% - split(., f = list(ai_vars$oid)) %>% - lapply(., FUN = ai_fun, epsilon = epsilon, omit_NAs = omit_NAs) %>% - utils::stack(.) %>% - dplyr::mutate(AI = values, - oid = ind) %>% - dplyr::select(AI, oid) - - # Warning for missingness of census characteristics - missingYN <- as.data.frame(ai_vars[ , in_subgroup]) - names(missingYN) <- out_names - missingYN <- missingYN %>% - tidyr::pivot_longer(cols = dplyr::everything(), - names_to = "variable", - values_to = "val") %>% - dplyr::group_by(variable) %>% - dplyr::summarise(total = dplyr::n(), - n_missing = sum(is.na(val)), - percent_missing = paste0(round(mean(is.na(val)) * 100, 2), " %")) - - if (quiet == FALSE) { - # Warning for missing census data - if (sum(missingYN$n_missing) > 0) { - message("Warning: Missing census data") + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR', + 'MedHHInc' + ) + ) + stopifnot(is.numeric(epsilon), epsilon >= 0 , epsilon <= 1) # values between 0 and 1 + + # Select census variables + vars <- c( + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021', + MedHHInc = 'B19013_001' + ) + + selected_vars <- vars[subgroup] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + + # Acquire A variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for A computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() } - } - # Format output - if (geo_large == "state") { - ai <- merge(ai_vars, AItmp) %>% - dplyr::select(oid, state, AI) %>% - unique(.) %>% - dplyr::mutate(GEOID = oid) %>% - dplyr::select(GEOID, state, AI) %>% - .[.$GEOID != "NANA", ] - } - if (geo_large == "county") { - ai <- merge(ai_vars, AItmp) %>% - dplyr::select(oid, state, county, AI) %>% - unique(.) %>% - dplyr::mutate(GEOID = oid) %>% - dplyr::select(GEOID, state, county, AI) %>% - .[.$GEOID != "NANA", ] - } - if (geo_large == "tract") { - ai <- merge(ai_vars, AItmp) %>% - dplyr::select(oid, state, county, tract, AI) %>% - unique(.) %>% - dplyr::mutate(GEOID = oid) %>% - dplyr::select(GEOID, state, county, tract, AI) %>% - .[.$GEOID != "NANA", ] + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = .[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(.[, in_subgroup])) + } + + # Compute A + ## From Atkinson (1970) https://doi.org/10.1016/0022-0531(70)90039-6 + ## A_{\epsilon}(x_{1},...,x_{n}) = \begin{Bmatrix} + ## 1 - (\frac{1}{n}\sum_{i=1}^{n}x_{i}^{1-\epsilon})^{1/(1-\epsilon)}/(\frac{1}{n}\sum_{i=1}^{n}x_{i}) & \mathrm{if\:} \epsilon \neq 1 \\ + ## 1 - (\prod_{i=1}^{n}x_{i})^{1/n}/(\frac{1}{n}\sum_{i=1}^{n}x_{i}) & \mathrm{if\:} \epsilon = 1 \\ + ## \end{Bmatrix} + ## Where the Atkinson index (A) is defined for a population subgroup count (x) of a given smaller geographical unit (i) for n smaller geographical units + ## and an inequality-aversion parameter (epsilon) + ## If denoting the Hölder mean (based on `Atkinson()` function in 'DescTools' package) by + ## M_{p}(x_{1},...,x_{n}) = \begin{Bmatrix} + ## (\frac{1}{n}\sum_{i=1}^{n}x_{i}^{p})^{1/p} & \mathrm{if\:} p \neq 0 \\ + ## (\prod_{i=1}^{n}x_{i})^{1/n} & \mathrm{if\:} p = 0 \\ + ## \end{Bmatrix} + ## then A is + ## A_{\epsilon}(x_{1},...,x_{n}) = 1 - \frac{M_{1-\epsilon}(x_{1},...,x_{n})}{M_{1}(x_{1},...,x_{n})} + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = a_fun, epsilon = epsilon, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate( + A = values, + oid = ind + ) %>% + dplyr::select(A, oid) + + # Warning for missingness of census characteristics + missingYN <- as.data.frame(out_dat[, in_subgroup]) + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo_large == 'state') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, A) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, A) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'county') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, A) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, A) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'tract') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, tract, A) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, A) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, cbsa, A) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, A) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'csa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, csa, A) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, A) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'metro') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, metro, A) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, A) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + + out <- out %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(a = out, a_data = out_dat, missing = missingYN) + + return(out) } - - ai <- ai %>% - dplyr::arrange(GEOID) %>% - dplyr::as_tibble() - - ai_data <- ai_data %>% - dplyr::arrange(GEOID) %>% - dplyr::as_tibble() - - out <- list(ai = ai, - ai_data = ai_data, - missing = missingYN) - - return(out) -} diff --git a/R/bell.R b/R/bell.R new file mode 100644 index 0000000..5abef97 --- /dev/null +++ b/R/bell.R @@ -0,0 +1,430 @@ +#' Interaction Index based on Shevky & Williams (1949) and Bell (1954) +#' +#' Compute the aspatial Interaction Index (Bell) of a selected racial/ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial/ethnic subgroup(s). See Details for available choices. +#' @param subgroup_ixn Character string specifying the racial/ethnic subgroup(s) as the interaction population. If the same as \code{subgroup}, will compute the simple isolation of the group. See Details for available choices. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Interaction Index (_xPy\*_) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954) \doi{10.2307/2574118}. This function provides the computation of _xPy\*_ for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' _xPy\*_ is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s) with higher values signifying higher probability of interaction (less isolation). _xPy\*_ can range in value from 0 to 1. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the _xPy\*_ value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the _xPy\*_ computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the _xPy\*_ computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{xpy_star}}{An object of class 'tbl' for the GEOID, name, and _xPy\*_ at specified larger census geographies.} +#' \item{\code{xpy_star_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute _xPy\*_.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_drop_geometry st_within +#' @importFrom stats complete.cases +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Interaction of non-Hispanic Black vs. non-Hispanic white populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' bell( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = 'NHoLB', +#' subgroup_ixn = 'NHoLW' +#' ) +#' +#' } +#' +bell <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + subgroup_ixn, + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + match.arg( + subgroup_ixn, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup, subgroup_ixn)] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + in_subgroup_ixn <- paste0(subgroup_ixn, 'E') + + # Acquire xPy* variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for xPy* computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = .[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(.[, in_subgroup])) + } + ## Count of racial/ethnic interaction subgroup population + if (length(in_subgroup_ixn) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ixn = .[, in_subgroup_ixn]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ixn = rowSums(.[, in_subgroup_ixn])) + } + + # Compute xPy* + ## From Bell (1954) https://doi.org/10.2307/2574118 + ## _{x}P_{y}^* = \sum_{i=1}^{k} \left ( \frac{x_{i}}{X}\right )\left ( \frac{y_{i}}{n_{i}}\right ) + ## Where for k geographical units i: + ## X denotes the total number of subgroup population in study (reference) area + ## x_{i} denotes the number of subgroup population X in geographical unit i + ## y_{i} denotes the number of subgroup population Y in geographical unit i + ## n_{i} denotes the total population of geographical unit i + ## If x_{i} = y_{i}, then computes the average isolation experienced by members of subgroup population X + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = xpy_star_fun, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate( + xPy_star = values, + oid = ind + ) %>% + dplyr::select(xPy_star, oid) + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup, in_subgroup_ixn)] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo_large == 'state') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, xPy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, xPy_star) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'county') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, xPy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, xPy_star) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'tract') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, tract, xPy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, xPy_star) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, cbsa, xPy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, xPy_star) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'csa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, csa, xPy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, xPy_star) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'metro') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, metro, xPy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, xPy_star) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + + out <- out %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(xpy_star = out, xpy_star_data = out_dat, missing = missingYN) + + return(out) + } diff --git a/R/bemanian_beyer.R b/R/bemanian_beyer.R new file mode 100644 index 0000000..646bf4e --- /dev/null +++ b/R/bemanian_beyer.R @@ -0,0 +1,408 @@ +#' Local Exposure and Isolation metric based on Bemanian & Beyer (2017) +#' +#' Compute the aspatial Local Exposure and Isolation (Bemanian & Beyer) metric of a selected racial/ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial/ethnic subgroup(s). See Details for available choices. +#' @param subgroup_ixn Character string specifying the racial/ethnic subgroup(s) as the interaction population. If the same as \code{subgroup}, will compute the simple isolation of the group. See Details for available choices. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Local Exposure and Isolation (\emph{LEx/Is}) metric of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}. This function provides the computation of \emph{LEx/Is} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' \emph{LEx/Is} is a measure of the probability that two individuals living within a specific smaller geography (e.g., census tract) of either different (i.e., exposure) or the same (i.e., isolation) racial/ethnic subgroup(s) will interact, assuming that individuals within a smaller geography are randomly mixed. \emph{LEx/Is} is standardized with a logit transformation and centered against an expected case that all races/ethnicities are evenly distributed across a larger geography. (Note: will adjust data by 0.025 if probabilities are zero, one, or undefined. The output will include a warning if adjusted. See \code{\link[car]{logit}} for additional details.) +#' +#' \emph{LEx/Is} can range from negative infinity to infinity. If \emph{LEx/Is} is zero then the estimated probability of the interaction between two people of the given subgroup(s) within a smaller geography is equal to the expected probability if the subgroup(s) were perfectly mixed in the larger geography. If \emph{LEx/Is} is greater than zero then the interaction is more likely to occur within the smaller geography than in the larger geography, and if \emph{LEx/Is} is less than zero then the interaction is less likely to occur within the smaller geography than in the larger geography. Note: the exponentiation of each \emph{LEx/Is} metric results in the odds ratio of the specific exposure or isolation of interest in a smaller geography relative to the larger geography. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{LEx/Is} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{LEx/Is} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{LEx/Is} computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{lexis}}{An object of class 'tbl' for the GEOID, name, and \emph{LEx/Is} at specified smaller census geographies.} +#' \item{\code{lexis_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{LEx/Is}.} +#' } +#' +#' @import dplyr +#' @importFrom car logit +#' @importFrom sf st_drop_geometry st_within +#' @importFrom stats complete.cases +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Local Exposure and Isolation of non-Hispanic Black vs. non-Hispanic white populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' bemanian_beyer( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = 'NHoLB', +#' subgroup_ixn = 'NHoLW' +#' ) +#' +#' } +#' +bemanian_beyer <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + subgroup_ixn, + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + match.arg( + subgroup_ixn, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup, subgroup_ixn)] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + in_subgroup_ixn <- paste0(subgroup_ixn, 'E') + + # Acquire LEx/Is variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for LEx/Is computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste(STATEFP, COUNTYFP, sep = ''), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste(STATEFP, COUNTYFP, TRACTCE, sep = ''), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = .[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(.[, in_subgroup])) + } + ## Count of racial/ethnic interaction subgroup population + if (length(in_subgroup_ixn) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ixn = .[, in_subgroup_ixn]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ixn = rowSums(.[, in_subgroup_ixn])) + } + + # Compute LEx/Is + ## From Bemanian & Beyer (2017) https://doi.org/10.1158/1055-9965.EPI-16-0926 + ## E^*_{m,n}(i) = log\left(\frac{p_{im} \times p_{in}}{1 - p_{im} \times p_{in}}\right) - log\left(\frac{P_{m} \times P_{n}}{1 - P_{m} \times P_{n}}\right) + ## Where for smaller geographical unit i: + ## p_{im} denotes the number of subgroup population m in smaller geographical unit i + ## p_{in} denotes the number of subgroup population n in smaller geographical unit i + ## P_{m} denotes the number of subgroup population m in larger geographical unit within which the smaller geographic unit i is located + ## P_{n} denotes the number of subgroup population n in larger geographical unit within which the smaller geographic unit i is located + ## If m \ne n, then computes the exposure of members of subgroup populations m and n + ## If m = n, then computes the simple isolation experienced by members of subgroup population m + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = lexis_fun, omit_NAs = omit_NAs) %>% + do.call('rbind', .) + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup, in_subgroup_ixn)] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(GEOID)) + + if (geo_small == 'state') { + out <- out %>% + dplyr::select(GEOID, state, LExIs) + } + if (geo_small == 'county') { + out <- out %>% + dplyr::select(GEOID, state, county, LExIs) + } + if (geo_small == 'tract') { + out <- out %>% + dplyr::select(GEOID, state, county, tract, LExIs) + } + if (geo_small == 'block group') { + out <- out %>% + dplyr::select(GEOID, state, county, tract, block.group, LExIs) + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + dplyr::select(GEOID, cbsa) %>% + dplyr::left_join(out, ., by = dplyr::join_by(GEOID)) %>% + dplyr::relocate(cbsa, .after = county) + } + if (geo_large == 'csa') { + out <- out_dat %>% + dplyr::select(GEOID, csa) %>% + dplyr::left_join(out, ., by = dplyr::join_by(GEOID)) %>% + dplyr::relocate(csa, .after = county) + } + if (geo_large == 'metro') { + out <- out_dat %>% + dplyr::select(GEOID, metro) %>% + dplyr::left_join(out, ., by = dplyr::join_by(GEOID)) %>% + dplyr::relocate(metro, .after = county) + } + + out <- out %>% + unique(.) %>% + .[.$GEOID != 'NANA',] %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(lexis = out, lexis_data = out_dat, missing = missingYN) + + return(out) + } diff --git a/R/bravo.R b/R/bravo.R index ce8499a..c20bdea 100644 --- a/R/bravo.R +++ b/R/bravo.R @@ -1,36 +1,36 @@ #' Educational Isolation Index based on Bravo et al. (2021) -#' +#' #' Compute the spatial Educational Isolation Index (Bravo) of selected educational attainment category(ies). #' -#' @param geo Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}. +#' @param geo Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}. #' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. #' @param subgroup Character string specifying the educational attainment category(ies). See Details for available choices. #' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. #' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics #' -#' @details This function will compute the spatial Educational Isolation Index (EI) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Bravo et al. (2021) \doi{10.3390/ijerph18179384} who originally designed the metric for the educational isolation of individual without a college degree. This function provides the computation of EI for any of the U.S. Census Bureau educational attainment levels. -#' -#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The five educational attainment levels (U.S. Census Bureau definitions) are: +#' @details This function will compute the spatial Educational Isolation Index (\emph{EI}) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Bravo et al. (2021) \doi{10.3390/ijerph18179384} who originally designed the metric for the educational isolation of individual without a college degree. This function provides the computation of \emph{EI} for any of the U.S. Census Bureau educational attainment levels. +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The five educational attainment levels (U.S. Census Bureau definitions) are: #' \itemize{ -#' \item{B06009_002: }{Less than high school graduate "LtHS"} -#' \item{B06009_003: }{High school graduate (includes equivalency) "HSGiE"} -#' \item{B06009_004: }{Some college or associate's degree "SCoAD"} -#' \item{B06009_005: }{Bachelor's degree "BD"} -#' \item{B06009_006: }{Graduate or professional degree "GoPD"} +#' \item \strong{B06009_002}: Less than high school graduate \code{'LtHS'} +#' \item \strong{B06009_003}: High school graduate (includes equivalency) \code{'HSGiE'} +#' \item \strong{B06009_004}: Some college or associate's degree \code{'SCoAD'} +#' \item \strong{B06009_005}: Bachelor's degree \code{'BD'} +#' \item \strong{B06009_006}: Graduate or professional degree \code{'GoPD'} #' } -#' Note: If \code{year = 2009}, then the ACS-5 data (2005-2009) are from the "B15002" question. -#' -#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and EI values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the EI for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. -#' -#' A census geography (and its neighbors) that has nearly all of its population with the specified educational attainment category (e.g., a Bachelor's degree or more) will have an EI value close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population with the specified educational attainment category (e.g., less than a Bachelor's degree) will have an EI value close to 0. -#' +#' Note: If \code{year = 2009}, then the ACS-5 data (2005-2009) are from the \strong{B15002} question. +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and \emph{EI} values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the \emph{EI} for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. +#' +#' A census geography (and its neighbors) that has nearly all of its population with the specified educational attainment category (e.g., a Bachelor's degree or more) will have an \emph{EI} value close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population with the specified educational attainment category (e.g., less than a Bachelor's degree) will have an \emph{EI} value close to 0. +#' #' @return An object of class 'list'. This is a named list with the following components: -#' +#' #' \describe{ -#' \item{\code{ei}}{An object of class 'tbl' for the GEOID, name, EI, and raw census values of specified census geographies.} -#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute EI.} +#' \item{\code{ei}}{An object of class 'tbl' for the GEOID, name, \emph{EI}, and raw census values of specified census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{EI}.} #' } -#' +#' #' @import dplyr #' @importFrom Matrix sparseMatrix #' @importFrom sf st_drop_geometry st_geometry st_intersects @@ -39,180 +39,283 @@ #' @importFrom tidycensus get_acs #' @importFrom tidyr pivot_longer separate #' @export -#' +#' #' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). #' #' @examples #' \dontrun{ #' # Wrapped in \dontrun{} because these examples require a Census API key. -#' -#' # Tract-level metric (2020) -#' bravo(geo = "tract", state = "GA", year = 2020, subgroup = c("LtHS", "HSGiE")) -#' -#' # County-level metric (2020) -#' bravo(geo = "county", state = "GA", year = 2020, subgroup = c("LtHS", "HSGiE")) -#' +#' +#' # Educational Isolation Index of less than some college or associate's degree attainment +#' ## of census tracts within Georgia, U.S.A., counties (2020) +#' bravo( +#' geo = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = c('LtHS', 'HSGiE') +#' ) +#' #' } -#' -bravo <- function(geo = "tract", year = 2020, subgroup, quiet = FALSE, ...) { - - # Check arguments - match.arg(geo, choices = c("county", "tract")) - stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward - match.arg(subgroup, several.ok = TRUE, - choices = c("LtHS", "HSGiE", "SCoAD", "BD", "GoPD")) - - # Select census variables - vars <- c(TotalPop = "B06009_001", - LtHS = "B06009_002", - HSGiE = "B06009_003", - SCoAD = "B06009_004", - BD = "B06009_005", - GoPD = "B06009_006") - - selected_vars <- vars[c("TotalPop", subgroup)] +#' +bravo <- function(geo = 'tract', + year = 2020, + subgroup, + quiet = FALSE, + ...) { - if (year == 2009) { - vars <- matrix(c("TotalPop", "TotalPop", "B15002_001", - "LtHS", "mNSC", "B15002_003", - "LtHS", "mNt4G", "B15002_004", - "LtHS", "m5t6G", "B15002_005", - "LtHS", "m7t8G", "B15002_006", - "LtHS", "m9G", "B15002_007", - "LtHS", "m10G", "B15002_008", - "LtHS", "m11G", "B15002_009", - "LtHS", "m12GND", "B15002_010", - "HSGiE", "mHSGGEDoA", "B15002_011", - "SCoAD", "mSClt1Y", "B15002_012", - "SCoAD", "mSC1oMYND", "B15002_013", - "SCoAD", "mAD", "B15002_014", - "BD", "mBD", "B15002_015", - "GoPD", "mMD", "B15002_016", - "GoPD", "mPSD", "B15002_017", - "GoPD", "mDD", "B15002_018", - "LtHS", "fNSC", "B15002_020", - "LtHS", "fNt4G", "B15002_021", - "LtHS", "f5t6G", "B15002_022", - "LtHS", "f7t8G", "B15002_023", - "LtHS", "f9G", "B15002_024", - "LtHS", "f10G", "B15002_025", - "LtHS", "f11G", "B15002_026", - "LtHS", "f12GND", "B15002_027", - "HSGiE", "fHSGGEDoA", "B15002_028", - "SCoAD", "fSClt1Y", "B15002_029", - "SCoAD", "fSC1oMYND", "B15002_030", - "SCoAD", "fAD", "B15002_031", - "BD", "fBD", "B15002_032", - "GoPD", "fMD", "B15002_033", - "GoPD", "fPSD", "B15002_034", - "GoPD", "fDD", "B15002_035"), nrow = 33, ncol = 3, byrow = TRUE) + # Check arguments + match.arg(geo, choices = c('county', 'tract')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c('LtHS', 'HSGiE', 'SCoAD', 'BD', 'GoPD') + ) - selected_vars <- stats::setNames(vars[ vars[ , 1] %in% c("TotalPop", subgroup) , 3], - vars[ vars[ , 1] %in% c("TotalPop", subgroup) , 2]) - } - - out_names <- names(selected_vars) # save for output - prefix <- "subgroup" - suffix <- seq(1:length(selected_vars[-1])) - names(selected_vars) <- c("TotalPop", paste(prefix, suffix, sep = "")) - in_names <- paste(names(selected_vars), "E", sep = "") - - # Acquire EI variables and sf geometries - ei_vars <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo, - year = year, - output = "wide", - variables = selected_vars, - geometry = TRUE, ...))) - - if (geo == "tract") { - ei_vars <- ei_vars %>% - tidyr::separate(NAME, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]","", tract)) - } else { - ei_vars <- ei_vars %>% tidyr::separate(NAME, into = c("county", "state"), sep = ",") - } - - ei_vars <- ei_vars %>% - dplyr::mutate(subgroup = rowSums(sf::st_drop_geometry(ei_vars[ , in_names[-1]]))) - - # Compute EI - ## From Bravo et al. (2021) https://doi.org/10.3390/ijerph18179384 - ## EI_{im} = (Sigma_{j∈∂_{i}} w_{ij} * T_{jm}) / (Sigma_{j∈∂_{i}} w_{ij} * T_{j}) - ## Where: - ## ∂_{i} denotes the set of index units i and its neighbors - ## Given M mutually exclusive subgroups of educational attainment categories, m indexes the subgroups of M - ## T_{i} denotes the total population in region i (TotalPop) - ## T_{im} denotes the population of the selected subgroup(s) (subgroup1, ...) - ## w_{ij} denotes a nXn first-order adjacency matrix, where n is the number of census geometries in the study area - ### and the entries of w_{ij} are set to 1 if a boundary is shared by region i and region j and zero otherwise - ### Entries of the main diagonal (since i∈∂_{i}, w_{ij} = w_{ii} when j = i) of w_{ij} are set to 1.5 - ### such that the weight of the index unit, i, is larger than the weights assigned to adjacent tracts - - ## Geospatial adjacency matrix (wij) - tmp <- sf::st_intersects(sf::st_geometry(ei_vars), sparse = TRUE) - names(tmp) <- as.character(seq_len(nrow(ei_vars))) - tmpL <- length(tmp) - tmpcounts <- unlist(Map(length, tmp)) - tmpi <- rep(1:tmpL, tmpcounts) - tmpj <- unlist(tmp) - wij <- Matrix::sparseMatrix(i = tmpi, j = tmpj, x = 1, dims = c(tmpL, tmpL)) - diag(wij) <- 1.5 - - ## Compute - ei_vars <- sf::st_drop_geometry(ei_vars) # drop geometries (can join back later) - EIim <- list() - for (i in 1:dim(wij)[1]){ - EIim[[i]] <- sum(as.matrix(wij[i, ])*ei_vars[ , "subgroup"]) / sum(as.matrix(wij[i, ])*ei_vars[, "TotalPopE"]) - } - ei_vars$EI <- unlist(EIim) - - # Warning for missingness of census characteristics - missingYN <- ei_vars[ , in_names] - names(missingYN) <- out_names - missingYN <- missingYN %>% - tidyr::pivot_longer(cols = dplyr::everything(), - names_to = "variable", - values_to = "val") %>% - dplyr::group_by(variable) %>% - dplyr::summarise(total = dplyr::n(), - n_missing = sum(is.na(val)), - percent_missing = paste0(round(mean(is.na(val)) * 100, 2), " %")) - - if (quiet == FALSE) { - # Warning for missing census data - if (sum(missingYN$n_missing) > 0) { - message("Warning: Missing census data") + # Select census variables + vars <- c( + TotalPop = 'B06009_001', + LtHS = 'B06009_002', + HSGiE = 'B06009_003', + SCoAD = 'B06009_004', + BD = 'B06009_005', + GoPD = 'B06009_006' + ) + + selected_vars <- vars[c('TotalPop', subgroup)] + + if (year == 2009) { + vars <- matrix( + c( + 'TotalPop', + 'TotalPop', + 'B15002_001', + 'LtHS', + 'mNSC', + 'B15002_003', + 'LtHS', + 'mNt4G', + 'B15002_004', + 'LtHS', + 'm5t6G', + 'B15002_005', + 'LtHS', + 'm7t8G', + 'B15002_006', + 'LtHS', + 'm9G', + 'B15002_007', + 'LtHS', + 'm10G', + 'B15002_008', + 'LtHS', + 'm11G', + 'B15002_009', + 'LtHS', + 'm12GND', + 'B15002_010', + 'HSGiE', + 'mHSGGEDoA', + 'B15002_011', + 'SCoAD', + 'mSClt1Y', + 'B15002_012', + 'SCoAD', + 'mSC1oMYND', + 'B15002_013', + 'SCoAD', + 'mAD', + 'B15002_014', + 'BD', + 'mBD', + 'B15002_015', + 'GoPD', + 'mMD', + 'B15002_016', + 'GoPD', + 'mPSD', + 'B15002_017', + 'GoPD', + 'mDD', + 'B15002_018', + 'LtHS', + 'fNSC', + 'B15002_020', + 'LtHS', + 'fNt4G', + 'B15002_021', + 'LtHS', + 'f5t6G', + 'B15002_022', + 'LtHS', + 'f7t8G', + 'B15002_023', + 'LtHS', + 'f9G', + 'B15002_024', + 'LtHS', + 'f10G', + 'B15002_025', + 'LtHS', + 'f11G', + 'B15002_026', + 'LtHS', + 'f12GND', + 'B15002_027', + 'HSGiE', + 'fHSGGEDoA', + 'B15002_028', + 'SCoAD', + 'fSClt1Y', + 'B15002_029', + 'SCoAD', + 'fSC1oMYND', + 'B15002_030', + 'SCoAD', + 'fAD', + 'B15002_031', + 'BD', + 'fBD', + 'B15002_032', + 'GoPD', + 'fMD', + 'B15002_033', + 'GoPD', + 'fPSD', + 'B15002_034', + 'GoPD', + 'fDD', + 'B15002_035' + ), + nrow = 33, + ncol = 3, + byrow = TRUE + ) + + selected_vars <- stats::setNames( + vars[vars[, 1] %in% c('TotalPop', subgroup) , 3], + vars[vars[, 1] %in% c('TotalPop', subgroup) , 2] + ) } + + out_names <- names(selected_vars) # save for output + prefix <- 'subgroup' + suffix <- seq(1:length(selected_vars[-1])) + names(selected_vars) <- c('TotalPop', paste0(prefix, suffix)) + in_names <- paste0(names(selected_vars), 'E') + + # Acquire EI variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + ... + ) + )) + + if (geo == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } else { + out_dat <- out_dat %>% + tidyr::separate(NAME, into = c('county', 'state'), sep = ',') + } + + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(sf::st_drop_geometry(out_dat[, in_names[-1]]))) + + # Compute EI + ## From Bravo et al. (2021) https://doi.org/10.3390/ijerph18179384 + ## EI_{im} = (Sigma_{j∈∂_{i}} w_{ij} * T_{jm}) / (Sigma_{j∈∂_{i}} w_{ij} * T_{j}) + ## Where: + ## ∂_{i} denotes the set of index units i and its neighbors + ## Given M mutually exclusive subgroups of educational attainment categories, m indexes the subgroups of M + ## T_{i} denotes the total population in region i (TotalPop) + ## T_{im} denotes the population of the selected subgroup(s) (subgroup1, ...) + ## w_{ij} denotes a nXn first-order adjacency matrix, where n is the number of census geometries in the study area + ### and the entries of w_{ij} are set to 1 if a boundary is shared by region i and region j and zero otherwise + ### Entries of the main diagonal (since i∈∂_{i}, w_{ij} = w_{ii} when j = i) of w_{ij} are set to 1.5 + ### such that the weight of the index unit, i, is larger than the weights assigned to adjacent tracts + + ## Geospatial adjacency matrix (w_ij) + tmp <- sf::st_intersects(sf::st_geometry(out_dat), sparse = TRUE) + names(tmp) <- as.character(seq_len(nrow(out_dat))) + tmp_L <- length(tmp) + tmp_counts <- unlist(Map(length, tmp)) + tmp_i <- rep(1:tmp_L, tmp_counts) + tmp_j <- unlist(tmp) + w_ij <- Matrix::sparseMatrix( + i = tmp_i, + j = tmp_j, + x = 1, + dims = c(tmp_L, tmp_L) + ) + diag(w_ij) <- 1.5 + + ## Compute + out_dat <- out_dat %>% + sf::st_drop_geometry() # drop geometries (can join back later) + out_tmp <- list() + for (i in 1:dim(w_ij)[1]) { + out_tmp[[i]] <- sum(as.matrix(w_ij[i,]) * out_dat[, 'subgroup']) / + sum(as.matrix(w_ij[i,]) * out_dat[, 'TotalPopE']) + } + out_dat$EI <- unlist(out_tmp) + + # Warning for missingness of census characteristics + missingYN <- out_dat[, in_names] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo == 'tract') { + out <- out_dat %>% + dplyr::select(c( + 'GEOID', + 'state', + 'county', + 'tract', + 'EI', + dplyr::all_of(in_names) + )) + names(out) <- c('GEOID', 'state', 'county', 'tract', 'EI', out_names) + } else { + out <- out_dat %>% + dplyr::select(c('GEOID', 'state', 'county', 'EI', dplyr::all_of(in_names))) + names(out) <- c('GEOID', 'state', 'county', 'EI', out_names) + } + + out <- out %>% + dplyr::mutate( + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(ei = out, missing = missingYN) + + return(out) } - - # Format output - if (geo == "tract") { - ei <- ei_vars %>% - dplyr::select(c("GEOID", - "state", - "county", - "tract", - "EI", - dplyr::all_of(in_names))) - names(ei) <- c("GEOID", "state", "county", "tract", "EI", out_names) - } else { - ei <- ei_vars %>% - dplyr::select(c("GEOID", - "state", - "county", - "EI", - dplyr::all_of(in_names))) - names(ei) <- c("GEOID", "state", "county", "EI", out_names) - } - - ei <- ei %>% - dplyr::mutate(state = stringr::str_trim(state), - county = stringr::str_trim(county)) %>% - dplyr::arrange(GEOID) %>% - dplyr::as_tibble() - - out <- list(ei = ei, - missing = missingYN) - - return(out) -} diff --git a/R/duncan.R b/R/duncan.R index 1ca1bbe..7f7a563 100644 --- a/R/duncan.R +++ b/R/duncan.R @@ -1,9 +1,9 @@ -#' Dissimilarity Index based on Duncan & Duncan (1955) -#' -#' Compute the aspatial Dissimilarity Index (Duncan) of selected racial/ethnic subgroup(s) and U.S. geographies +#' Dissimilarity Index based on Duncan & Duncan (1955) +#' +#' Compute the aspatial Dissimilarity Index (Duncan & Duncan) of selected racial/ethnic subgroup(s) and U.S. geographies #' -#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = "county"}. -#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = "tract"}. +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. #' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. #' @param subgroup Character string specifying the racial/ethnic subgroup(s) as the comparison population. See Details for available choices. #' @param subgroup_ref Character string specifying the racial/ethnic subgroup(s) as the reference population. See Details for available choices. @@ -11,248 +11,419 @@ #' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. #' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics #' -#' @details This function will compute the aspatial Dissimilarity Index (DI) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Duncan & Duncan (1955) \doi{10.2307/2088328}. This function provides the computation of DI for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). -#' -#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' @details This function will compute the aspatial Dissimilarity Index (\emph{D}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Duncan & Duncan (1955) \doi{10.2307/2088328}. This function provides the computation of \emph{D} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: #' \itemize{ -#' \item{B03002_002: }{not Hispanic or Latino "NHoL"} -#' \item{B03002_003: }{not Hispanic or Latino, white alone "NHoLW"} -#' \item{B03002_004: }{not Hispanic or Latino, Black or African American alone "NHoLB"} -#' \item{B03002_005: }{not Hispanic or Latino, American Indian and Alaska Native alone "NHoLAIAN"} -#' \item{B03002_006: }{not Hispanic or Latino, Asian alone "NHoLA"} -#' \item{B03002_007: }{not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "NHoLNHOPI"} -#' \item{B03002_008: }{not Hispanic or Latino, Some other race alone "NHoLSOR"} -#' \item{B03002_009: }{not Hispanic or Latino, Two or more races "NHoLTOMR"} -#' \item{B03002_010: }{not Hispanic or Latino, Two races including Some other race "NHoLTRiSOR"} -#' \item{B03002_011: }{not Hispanic or Latino, Two races excluding Some other race, and three or more races "NHoLTReSOR"} -#' \item{B03002_012: }{Hispanic or Latino "HoL"} -#' \item{B03002_013: }{Hispanic or Latino, white alone "HoLW"} -#' \item{B03002_014: }{Hispanic or Latino, Black or African American alone "HoLB"} -#' \item{B03002_015: }{Hispanic or Latino, American Indian and Alaska Native alone "HoLAIAN"} -#' \item{B03002_016: }{Hispanic or Latino, Asian alone "HoLA"} -#' \item{B03002_017: }{Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "HoLNHOPI"} -#' \item{B03002_018: }{Hispanic or Latino, Some other race alone "HoLSOR"} -#' \item{B03002_019: }{Hispanic or Latino, Two or more races "HoLTOMR"} -#' \item{B03002_020: }{Hispanic or Latino, Two races including Some other race "HoLTRiSOR"} -#' \item{B03002_021: }{Hispanic or Latino, Two races excluding Some other race, and three or more races "HoLTReSOR"} +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} #' } -#' +#' #' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -#' -#' DI is a measure of the evenness of racial/ethnic residential segregation when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. DI can range in value from 0 to 1 and represents the proportion of racial/ethnic subgroup members that would have to change their area of residence to achieve an even distribution within the larger geographical area under conditions of maximum segregation. -#' -#' Larger geographies available include state \code{geo_large = "state"}, county \code{geo_large = "county"}, and census tract \code{geo_large = "tract"} levels. Smaller geographies available include, county \code{geo_small = "county"}, census tract \code{geo_small = "tract"}, and census block group \code{geo_small = "block group"} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the DI value returned is NA. +#' +#' \emph{D} is a measure of the evenness of racial/ethnic residential segregation when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. \emph{D} can range in value from 0 to 1 and represents the proportion of racial/ethnic subgroup members that would have to change their area of residence to achieve an even distribution within the larger geographical area under conditions of maximum segregation. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{D} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{D} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{D} computation. #' #' @return An object of class 'list'. This is a named list with the following components: -#' +#' #' \describe{ -#' \item{\code{di}}{An object of class 'tbl' for the GEOID, name, and DI at specified larger census geographies.} -#' \item{\code{di_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} -#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute DI} +#' \item{\code{di}}{An object of class 'tbl' for the GEOID, name, and \emph{D} at specified larger census geographies.} +#' \item{\code{d_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{D}.} #' } -#' +#' #' @import dplyr -#' @importFrom sf st_drop_geometry +#' @importFrom sf st_drop_geometry st_within #' @importFrom stats complete.cases #' @importFrom tidycensus get_acs #' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions #' @importFrom utils stack #' @export -#' +#' #' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). #' #' @examples #' \dontrun{ #' # Wrapped in \dontrun{} because these examples require a Census API key. -#' +#' #' # Dissimilarity Index of non-Hispanic Black vs. non-Hispanic white populations -#' ## of census tracts within Georgia, U.S.A., counties (2020) -#' duncan(geo_large = "county", geo_small = "tract", state = "GA", year = 2020, -#' subgroup = "NHoLB", subgroup_ref = "NHoLW") -#' +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' duncan( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = 'NHoLB', +#' subgroup_ref = 'NHoLW' +#' ) +#' #' } -#' -duncan <- function(geo_large = "county", geo_small = "tract", year = 2020, subgroup, subgroup_ref, omit_NAs = TRUE, quiet = FALSE, ...) { - - # Check arguments - match.arg(geo_large, choices = c("state", "county", "tract")) - match.arg(geo_small, choices = c("county", "tract", "block group")) - stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward - match.arg(subgroup, several.ok = TRUE, - choices = c("NHoL", "NHoLW", "NHoLB", "NHoLAIAN", "NHoLA", "NHoLNHOPI", - "NHoLSOR", "NHoLTOMR", "NHoLTRiSOR", "NHoLTReSOR", - "HoL", "HoLW", "HoLB", "HoLAIAN", "HoLA", "HoLNHOPI", - "HoLSOR", "HoLTOMR", "HoLTRiSOR", "HoLTReSOR")) - match.arg(subgroup_ref, several.ok = TRUE, - choices = c("NHoL", "NHoLW", "NHoLB", "NHoLAIAN", "NHoLA", "NHoLNHOPI", - "NHoLSOR", "NHoLTOMR", "NHoLTRiSOR", "NHoLTReSOR", - "HoL", "HoLW", "HoLB", "HoLAIAN", "HoLA", "HoLNHOPI", - "HoLSOR", "HoLTOMR", "HoLTRiSOR", "HoLTReSOR")) - - # Select census variables - vars <- c(NHoL = "B03002_002", - NHoLW = "B03002_003", - NHoLB = "B03002_004", - NHoLAIAN = "B03002_005", - NHoLA = "B03002_006", - NHoLNHOPI = "B03002_007", - NHoLSOR = "B03002_008", - NHoLTOMR = "B03002_009", - NHoLTRiSOR = "B03002_010", - NHoLTReSOR = "B03002_011", - HoL = "B03002_012", - HoLW = "B03002_013", - HoLB = "B03002_014", - HoLAIAN = "B03002_015", - HoLA = "B03002_016", - HoLNHOPI = "B03002_017", - HoLSOR = "B03002_018", - HoLTOMR = "B03002_019", - HoLTRiSOR = "B03002_020", - HoLTReSOR = "B03002_021") - - selected_vars <- vars[c(subgroup, subgroup_ref)] - out_names <- names(selected_vars) # save for output - in_subgroup <- paste(subgroup, "E", sep = "") - in_subgroup_ref <- paste(subgroup_ref, "E", sep = "") - - # Acquire DI variables and sf geometries - di_data <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo_small, - year = year, - output = "wide", - variables = selected_vars, - geometry = TRUE, - keep_geo_vars = TRUE, ...))) - - # Format output - if (geo_small == "county") { - di_data <- sf::st_drop_geometry(di_data) %>% - tidyr::separate(NAME.y, into = c("county", "state"), sep = ",") - } - if (geo_small == "tract") { - di_data <- sf::st_drop_geometry(di_data) %>% - tidyr::separate(NAME.y, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]", "", tract)) - } - if (geo_small == "block group") { - di_data <- sf::st_drop_geometry(di_data) %>% - tidyr::separate(NAME.y, into = c("block.group", "tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]", "", tract), - block.group = gsub("[^0-9\\.]", "", block.group)) - } +#' +duncan <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + subgroup_ref, + omit_NAs = TRUE, + quiet = FALSE, + ...) { - # Grouping IDs for DI computation - if (geo_large == "tract") { - di_vars <- di_data %>% - dplyr::mutate(oid = paste(.$STATEFP, .$COUNTYFP, .$TRACTCE, sep = ""), - state = stringr::str_trim(state), - county = stringr::str_trim(county)) - } - if (geo_large == "county") { - di_vars <- di_data %>% - dplyr::mutate(oid = paste(.$STATEFP, .$COUNTYFP, sep = ""), - state = stringr::str_trim(state), - county = stringr::str_trim(county)) - } - if (geo_large == "state") { - di_vars <- di_data %>% - dplyr::mutate(oid = .$STATEFP, - state = stringr::str_trim(state)) - } - - # Count of racial/ethnic subgroup populations - ## Count of racial/ethnic comparison subgroup population - if (length(in_subgroup) == 1) { - di_vars <- di_vars %>% - dplyr::mutate(subgroup = .[ , in_subgroup]) - } else { - di_vars <- di_vars %>% - dplyr::mutate(subgroup = rowSums(.[ , in_subgroup])) - } - ## Count of racial/ethnic reference subgroup population - if (length(in_subgroup_ref) == 1) { - di_vars <- di_vars %>% - dplyr::mutate(subgroup_ref = .[ , in_subgroup_ref]) - } else { - di_vars <- di_vars %>% - dplyr::mutate(subgroup_ref = rowSums(.[ , in_subgroup_ref])) - } - - # Compute DI - ## From Duncan & Duncan (1955) https://doi.org/10.2307/2088328 - ## D_{jt} = 1/2 \sum_{i=1}^{k} | \frac{x_{ijt}}{X_{jt}}-\frac{y_{ijt}}{Y_{jt}}| - ## Where for k smaller geographies: - ## D_{jt} denotes the DI of larger geography j at time t - ## x_{ijt} denotes the racial/ethnic subgroup population of smaller geography i within larger geography j at time t - ## X_{jt} denotes the racial/ethnic subgroup population of larger geography j at time t - ## y_{ijt} denotes the racial/ethnic referent subgroup population of smaller geography i within larger geography j at time t - ## Y_{jt} denotes the racial/ethnic referent subgroup population of larger geography j at time t - - ## Compute - DItmp <- di_vars %>% - split(., f = list(di_vars$oid)) %>% - lapply(., FUN = di_fun, omit_NAs = omit_NAs) %>% - utils::stack(.) %>% - dplyr::mutate(DI = values, - oid = ind) %>% - dplyr::select(DI, oid) - - # Warning for missingness of census characteristics - missingYN <- di_vars[ , c(in_subgroup, in_subgroup_ref)] - names(missingYN) <- out_names - missingYN <- missingYN %>% - tidyr::pivot_longer(cols = dplyr::everything(), - names_to = "variable", - values_to = "val") %>% - dplyr::group_by(variable) %>% - dplyr::summarise(total = dplyr::n(), - n_missing = sum(is.na(val)), - percent_missing = paste0(round(mean(is.na(val)) * 100, 2), " %")) - - if (quiet == FALSE) { - # Warning for missing census data - if (sum(missingYN$n_missing) > 0) { - message("Warning: Missing census data") + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + match.arg( + subgroup_ref, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c(subgroup, subgroup_ref)] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + in_subgroup_ref <- paste0(subgroup_ref, 'E') + + # Acquire D variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for D computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = .[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(.[, in_subgroup])) + } + ## Count of racial/ethnic reference subgroup population + if (length(in_subgroup_ref) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ref = .[, in_subgroup_ref]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ref = rowSums(.[, in_subgroup_ref])) + } + + # Compute D + ## From Duncan & Duncan (1955) https://doi.org/10.2307/2088328 + ## D_{jt} = 1/2 \sum_{i=1}^{k} | \frac{x_{ijt}}{X_{jt}}-\frac{y_{ijt}}{Y_{jt}}| + ## Where for k smaller geographies: + ## D_{jt} denotes the D of larger geography j at time t + ## x_{ijt} denotes the racial/ethnic subgroup population of smaller geography i within larger geography j at time t + ## X_{jt} denotes the racial/ethnic subgroup population of larger geography j at time t + ## y_{ijt} denotes the racial/ethnic referent subgroup population of smaller geography i within larger geography j at time t + ## Y_{jt} denotes the racial/ethnic referent subgroup population of larger geography j at time t + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = d_fun, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate( + D = values, + oid = ind + ) %>% + dplyr::select(D, oid) + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c(in_subgroup, in_subgroup_ref)] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo_large == 'state') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, D) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, D) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'county') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, D) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, D) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'tract') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, tract, D) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, D) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, cbsa, D) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, D) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'csa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, csa, D) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, D) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'metro') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, metro, D) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, D) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + + out <- out %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(d = out, d_data = out_dat, missing = missingYN) + + return(out) } - - # Format output - if (geo_large == "state") { - di <- merge(di_vars, DItmp) %>% - dplyr::select(oid, state, DI) %>% - unique(.) %>% - dplyr::mutate(GEOID = oid) %>% - dplyr::select(GEOID, state, DI) %>% - .[.$GEOID != "NANA", ] - } - if (geo_large == "county") { - di <- merge(di_vars, DItmp) %>% - dplyr::select(oid, state, county, DI) %>% - unique(.) %>% - dplyr::mutate(GEOID = oid) %>% - dplyr::select(GEOID, state, county, DI) %>% - .[.$GEOID != "NANA", ] - } - if (geo_large == "tract") { - di <- merge(di_vars, DItmp) %>% - dplyr::select(oid, state, county, tract, DI) %>% - unique(.) %>% - dplyr::mutate(GEOID = oid) %>% - dplyr::select(GEOID, state, county, tract, DI) %>% - .[.$GEOID != "NANA", ] - } - - di <- di %>% - dplyr::arrange(GEOID) %>% - dplyr::as_tibble() - - di_data <- di_data %>% - dplyr::arrange(GEOID) %>% - dplyr::as_tibble() - - out <- list(di = di, - di_data = di_data, - missing = missingYN) - - return(out) -} diff --git a/R/gini.R b/R/gini.R index 2aeca13..299c54f 100644 --- a/R/gini.R +++ b/R/gini.R @@ -1,107 +1,119 @@ -#' Gini Index based on Gini (1921) -#' +#' Gini Index based on Gini (1921) +#' #' Retrieve the aspatial Gini Index of income inequality. #' -#' @param geo Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}. +#' @param geo Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}. #' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. #' @param quiet Logical. If TRUE, will display messages about potential missing census information #' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics #' -#' @details This function will retrieve the aspatial Gini Index of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Gini (1921) \doi{10.2307/2223319}. -#' -#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey estimates of the Gini Index for income inequality (ACS: B19083). The estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. -#' +#' @details This function will retrieve the aspatial Gini Index (\emph{G}) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Gini (1921) \doi{10.2307/2223319}. +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey estimates of \emph{G} for income inequality (ACS: B19083). The estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. +#' #' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -#' -#' According to the U.S. Census Bureau \url{https://www.census.gov/topics/income-poverty/income-inequality/about/metrics/gini-index.html}: "The Gini Index is a summary measure of income inequality. The Gini coefficient incorporates the detailed shares data into a single statistic, which summarizes the dispersion of income across the entire income distribution. The Gini coefficient ranges from 0, indicating perfect equality (where everyone receives an equal share), to 1, perfect inequality (where only one recipient or group of recipients receives all the income). The Gini is based on the difference between the Lorenz curve (the observed cumulative income distribution) and the notion of a perfectly equal income distribution." -#' +#' +#' According to the U.S. Census Bureau \url{https://www.census.gov/topics/income-poverty/income-inequality/about/metrics/gini-index.html}: 'The Gini Index is a summary measure of income inequality. The Gini coefficient incorporates the detailed shares data into a single statistic, which summarizes the dispersion of income across the entire income distribution. The Gini coefficient ranges from 0, indicating perfect equality (where everyone receives an equal share), to 1, perfect inequality (where only one recipient or group of recipients receives all the income). The Gini is based on the difference between the Lorenz curve (the observed cumulative income distribution) and the notion of a perfectly equal income distribution.' +#' #' @return An object of class 'list'. This is a named list with the following components: -#' +#' #' \describe{ -#' \item{\code{gini}}{An object of class 'tbl' for the GEOID, name, and Gini index of specified census geographies.} -#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for the Gini index.} +#' \item{\code{g}}{An object of class 'tbl' for the GEOID, name, and \emph{G} of specified census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for \emph{G}.} #' } -#' +#' #' @import dplyr #' @importFrom stringr str_trim #' @importFrom tidycensus get_acs #' @importFrom tidyr pivot_longer separate #' @export -#' +#' #' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). #' #' @examples #' \dontrun{ #' # Wrapped in \dontrun{} because these examples require a Census API key. -#' -#' # Tract-level metric (2020) -#' gini(geo = "tract", state = "GA", year = 2020) -#' -#' # County-level metric (2020) -#' gini(geo = "county", state = "GA", year = 2020) -#' +#' +#' # Gini Index of income inequality +#' ## of census tracts within Georgia, U.S.A., counties (2020) +#' gini(geo = 'tract', state = 'GA', year = 2020) +#' #' } -#' -gini <- function(geo = "tract", year = 2020, quiet = FALSE, ...) { +#' +gini <- function(geo = 'tract', + year = 2020, + quiet = FALSE, + ...) { # Check arguments - match.arg(geo, choices = c("county", "tract")) + match.arg(geo, choices = c('county', 'tract')) stopifnot(is.numeric(year), year >= 2009) # the gini variable is available before and after 2009 but constrained for consistency with out indices (for now) # Select census variable - vars <- c(gini = "B19083_001") + vars <- c(gini = 'B19083_001') # Acquire Gini Index - gini_vars <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo, - year = year, - output = "wide", - variables = vars, ...))) - - if (geo == "tract") { - gini_vars <- gini_vars %>% - tidyr::separate(NAME, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]","", tract)) + tmp_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo, + year = year, + output = 'wide', + variables = vars, + ... + ) + )) + + if (geo == 'tract') { + tmp_dat <- tmp_dat %>% + tidyr::separate(NAME, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) } else { - gini_vars <- gini_vars %>% tidyr::separate(NAME, into = c("county", "state"), sep = ",") + tmp_dat <- tmp_dat %>% + tidyr::separate(NAME, into = c('county', 'state'), sep = ',') } - gini_vars <- gini_vars %>% - dplyr::mutate(gini = giniE) + tmp_dat <- tmp_dat %>% + dplyr::mutate(G = giniE) # Warning for missingness of census characteristics - missingYN <- gini_vars %>% - dplyr::select(gini) %>% - tidyr::pivot_longer(cols = dplyr::everything(), - names_to = "variable", - values_to = "val") %>% + missingYN <- tmp_dat %>% + dplyr::select(G) %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% dplyr::group_by(variable) %>% - dplyr::summarise(total = dplyr::n(), - n_missing = sum(is.na(val)), - percent_missing = paste0(round(mean(is.na(val)) * 100, 2), " %")) + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) if (quiet == FALSE) { # Warning for missing census data if (sum(missingYN$n_missing) > 0) { - message("Warning: Missing census data") + message('Warning: Missing census data') } } - if (geo == "tract") { - gini <- gini_vars %>% - dplyr::select(GEOID, state, county, tract, gini) + if (geo == 'tract') { + out <- tmp_dat %>% + dplyr::select(GEOID, state, county, tract, G) } else { - gini <- gini_vars %>% - dplyr::select(GEOID, state, county, gini) + out <- tmp_dat %>% + dplyr::select(GEOID, state, county, G) } - gini <- gini %>% - dplyr::mutate(state = stringr::str_trim(state), - county = stringr::str_trim(county)) %>% + out <- out %>% + dplyr::mutate( + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% dplyr::arrange(GEOID) %>% - dplyr::as_tibble() + dplyr::as_tibble() - out <- list(gini = gini, - missing = missingYN) + out <- list(g = out, missing = missingYN) return(out) } diff --git a/R/globals.R b/R/globals.R index f533fea..2c0cd46 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,43 +1,263 @@ -globalVariables(c("CWD", "EDU", "EMP", "FHH", "GEOID", "MedHHInc", "MedHHIncE", "MedHomeVal", "MedHomeValE", "NAME", - "NDI", "OCC", "PC1", "POV", "PUB", "PctCrwdHH_denE", "PctCrwdHH_num1E", "PctCrwdHH_num2E", - "PctCrwdHH_num3E", "PctCrwdHH_num4E", "PctCrwdHH_num5E", "PctCrwdHH_num6E", - "PctEducBchPlus", "PctEducHSPlus", "PctEducLTBch", "PctEducLTBchZ", "PctEducLTHS", - "PctEducLTHSZ", "PctEducLessThanHS_denE", "PctEducLessThanHS_numE", - "PctEduc_den25upE", "PctEduc_num25upADE", "PctEduc_num25upBDE", - "PctEduc_num25upGDE", "PctEduc_num25upHSE", "PctEduc_num25upSCE", - "PctFamBelowPov", "PctFamBelowPovE", "PctFamBelowPovZ", "PctFemHeadKids", - "PctFemHeadKidsZ", "PctFemHeadKids_denE", "PctFemHeadKids_num1E", - "PctFemHeadKids_num2E", "PctHHPov_denE", "PctHHPov_numE", "PctHHUnder30K_denE", - "PctHHUnder30K_num1E", "PctHHUnder30K_num2E", "PctHHUnder30K_num3E", - "PctHHUnder30K_num4E", "PctHHUnder30K_num5E", "PctMenMgmtBusScArti_denE", - "PctMenMgmtBusScArti_num1E", "PctMenMgmtBusScArti_num2E", "PctMgmtBusScArti", - "PctMgmtBusScArti_denE", "PctMgmtBusScArti_numE", "PctNComPlmb", "PctNComPlmbE", - "PctNComPlmbZ", "PctNoIDR", "PctNoIDRZ", "PctNoPhone", "PctNoPhoneE", "PctNoPhoneZ", - "PctNotOwnerOcc", "PctNotOwnerOccZ", "PctOwnerOcc", "PctOwnerOccE", "PctPubAsst", - "PctPubAsstZ", "PctPubAsst_denE", "PctPubAsst_numE", "PctRecvIDR", - "PctRecvIDR_denE", "PctRecvIDR_numE", "PctUnemp_1619FE", "PctUnemp_1619ME", - "PctUnemp_2021FE", "PctUnemp_2021ME", "PctUnemp_2224FE", "PctUnemp_2224ME", - "PctUnemp_2529FE", "PctUnemp_2529ME", "PctUnemp_4554FE", "PctUnemp_4554ME", - "PctUnemp_5559FE", "PctUnemp_5559ME", "PctUnemp_6061FE", "PctUnemp_6061ME", - "PctUnemp_6264FE", "PctUnemp_6264ME", "PctUnemp_6569FE", "PctUnemp_6569ME", - "PctUnemp_7074FE", "PctUnemp_7074ME", "PctUnemp_75upME", "PctUnemp_denE", - "PctUnemp_numE", "PctUnempl", "PctUnemplE", "PctUnemplZ", "PctWorkClass", - "PctWorkClassZ", "TotalPop", "TotalPopulationE", "U30", "county", "logMedHHInc", - "logMedHomeVal", "percent", "state", "total", "tract", "val", "variable", "giniE", - "A_edu", "A_inc", "A_wbinc", "A_wpcinc", "B100125i", "B100125iE", "B100125nhw", - "B100125nhwE", "B1015bih", "B1015bihE", "B1015i", "B1015iE", "B1015nhw", "B1015nhwE", - "B125150i", "B125150iE", "B125150nhw", "B125150nhwE", "B150200hw", "B150200i", - "B150200iE", "B150200nhw", "B150200nhwE", "B1520bih", "B1520bihE", "B1520i", "B1520iE", - "B1520nhw", "B1520nhwE", "B2025bih", "B2025bihE", "B2025i", "B2025iE", "B2025nhw", - "B2025nhwE", "B2530bih", "B2530bihE", "B2530i", "B2530iE", "B2530nhw", "B2530nhwE", - "ICE_edu", "ICE_inc", "ICE_rewb", "ICE_wbinc", "ICE_wpcinc", "NHoLB", "NHoLBE", "NHoLW", - "NHoLWE", "O200i", "O200iE", "O200nhw", "O200nhwE", "O25F10G", "O25F10GE", "O25F11G", - "O25F11GE", "O25F12GND", "O25F12GNDE", "O25F5t6G", "O25F5t6GE", "O25F7t8G", "O25F7t8GE", - "O25F9G", "O25F9GE", "O25FBD", "O25FBDE", "O25FDD", "O25FDDE", "O25FMD", "O25FMDE", "O25FNSC", - "O25FNSCE", "O25FNt4G", "O25FNt4GE", "O25FPSD", "O25FPSDE", "O25M10G", "O25M10GE", "O25M11G", - "O25M11GE", "O25M12GND", "O25M12GNDE", "O25M5t6G", "O25M5t6GE", "O25M7t8G", "O25M7t8GE", - "O25M9G", "O25M9GE", "O25MBD", "O25MBDE", "O25MDD", "O25MDDE", "O25MMD", "O25MMDE", "O25MNSC", - "O25MNSCE", "O25MNt4G", "O25MNt4GE", "O25MPSD", "O25MPSDE", "P_edu", "P_inc", "P_wbinc", - "P_wpcinc", "TotalPop_edu", "TotalPop_inc", "TotalPop_re", "TotalPopeduE", - "TotalPopiE", "TotalPopreE", "U10bih", "U10bihE", "U10i", "U10iE", "U10nhw", "U10nhwE", "NAME.y", - ".", "values", "ind", "oid", "block.group", "DI", "AI")) +globalVariables( + c( + 'CWD', + 'EDU', + 'EMP', + 'FHH', + 'GEOID', + 'MedHHInc', + 'MedHHIncE', + 'MedHomeVal', + 'MedHomeValE', + 'NAME', + 'NDI', + 'OCC', + 'PC1', + 'POV', + 'PUB', + 'PctCrwdHH_denE', + 'PctCrwdHH_num1E', + 'PctCrwdHH_num2E', + 'PctCrwdHH_num3E', + 'PctCrwdHH_num4E', + 'PctCrwdHH_num5E', + 'PctCrwdHH_num6E', + 'PctEducBchPlus', + 'PctEducHSPlus', + 'PctEducLTBch', + 'PctEducLTBchZ', + 'PctEducLTHS', + 'PctEducLTHSZ', + 'PctEducLessThanHS_denE', + 'PctEducLessThanHS_numE', + 'PctEduc_den25upE', + 'PctEduc_num25upADE', + 'PctEduc_num25upBDE', + 'PctEduc_num25upGDE', + 'PctEduc_num25upHSE', + 'PctEduc_num25upSCE', + 'PctFamBelowPov', + 'PctFamBelowPovE', + 'PctFamBelowPovZ', + 'PctFemHeadKids', + 'PctFemHeadKidsZ', + 'PctFemHeadKids_denE', + 'PctFemHeadKids_num1E', + 'PctFemHeadKids_num2E', + 'PctHHPov_denE', + 'PctHHPov_numE', + 'PctHHUnder30K_denE', + 'PctHHUnder30K_num1E', + 'PctHHUnder30K_num2E', + 'PctHHUnder30K_num3E', + 'PctHHUnder30K_num4E', + 'PctHHUnder30K_num5E', + 'PctMenMgmtBusScArti_denE', + 'PctMenMgmtBusScArti_num1E', + 'PctMenMgmtBusScArti_num2E', + 'PctMgmtBusScArti', + 'PctMgmtBusScArti_denE', + 'PctMgmtBusScArti_numE', + 'PctNComPlmb', + 'PctNComPlmbE', + 'PctNComPlmbZ', + 'PctNoIDR', + 'PctNoIDRZ', + 'PctNoPhone', + 'PctNoPhoneE', + 'PctNoPhoneZ', + 'PctNotOwnerOcc', + 'PctNotOwnerOccZ', + 'PctOwnerOcc', + 'PctOwnerOccE', + 'PctPubAsst', + 'PctPubAsstZ', + 'PctPubAsst_denE', + 'PctPubAsst_numE', + 'PctRecvIDR', + 'PctRecvIDR_denE', + 'PctRecvIDR_numE', + 'PctUnemp_1619FE', + 'PctUnemp_1619ME', + 'PctUnemp_2021FE', + 'PctUnemp_2021ME', + 'PctUnemp_2224FE', + 'PctUnemp_2224ME', + 'PctUnemp_2529FE', + 'PctUnemp_2529ME', + 'PctUnemp_4554FE', + 'PctUnemp_4554ME', + 'PctUnemp_5559FE', + 'PctUnemp_5559ME', + 'PctUnemp_6061FE', + 'PctUnemp_6061ME', + 'PctUnemp_6264FE', + 'PctUnemp_6264ME', + 'PctUnemp_6569FE', + 'PctUnemp_6569ME', + 'PctUnemp_7074FE', + 'PctUnemp_7074ME', + 'PctUnemp_75upME', + 'PctUnemp_denE', + 'PctUnemp_numE', + 'PctUnempl', + 'PctUnemplE', + 'PctUnemplZ', + 'PctWorkClass', + 'PctWorkClassZ', + 'TotalPop', + 'TotalPopulationE', + 'U30', + 'county', + 'logMedHHInc', + 'logMedHomeVal', + 'percent', + 'state', + 'total', + 'tract', + 'STATEFP', + 'COUNTYFP', + 'TRACTCE', + 'cbsa', + 'csa', + 'metro', + 'val', + 'variable', + 'giniE', + 'A_edu', + 'A_inc', + 'A_wbinc', + 'A_wpcinc', + 'B100125i', + 'B100125iE', + 'B100125nhw', + 'B100125nhwE', + 'B1015bih', + 'B1015bihE', + 'B1015i', + 'B1015iE', + 'B1015nhw', + 'B1015nhwE', + 'B125150i', + 'B125150iE', + 'B125150nhw', + 'B125150nhwE', + 'B150200hw', + 'B150200i', + 'B150200iE', + 'B150200nhw', + 'B150200nhwE', + 'B1520bih', + 'B1520bihE', + 'B1520i', + 'B1520iE', + 'B1520nhw', + 'B1520nhwE', + 'B2025bih', + 'B2025bihE', + 'B2025i', + 'B2025iE', + 'B2025nhw', + 'B2025nhwE', + 'B2530bih', + 'B2530bihE', + 'B2530i', + 'B2530iE', + 'B2530nhw', + 'B2530nhwE', + 'ICE_edu', + 'ICE_inc', + 'ICE_rewb', + 'ICE_wbinc', + 'ICE_wpcinc', + 'NHoLB', + 'NHoLBE', + 'NHoLW', + 'NHoLWE', + 'O200i', + 'O200iE', + 'O200nhw', + 'O200nhwE', + 'O25F10G', + 'O25F10GE', + 'O25F11G', + 'O25F11GE', + 'O25F12GND', + 'O25F12GNDE', + 'O25F5t6G', + 'O25F5t6GE', + 'O25F7t8G', + 'O25F7t8GE', + 'O25F9G', + 'O25F9GE', + 'O25FBD', + 'O25FBDE', + 'O25FDD', + 'O25FDDE', + 'O25FMD', + 'O25FMDE', + 'O25FNSC', + 'O25FNSCE', + 'O25FNt4G', + 'O25FNt4GE', + 'O25FPSD', + 'O25FPSDE', + 'O25M10G', + 'O25M10GE', + 'O25M11G', + 'O25M11GE', + 'O25M12GND', + 'O25M12GNDE', + 'O25M5t6G', + 'O25M5t6GE', + 'O25M7t8G', + 'O25M7t8GE', + 'O25M9G', + 'O25M9GE', + 'O25MBD', + 'O25MBDE', + 'O25MDD', + 'O25MDDE', + 'O25MMD', + 'O25MMDE', + 'O25MNSC', + 'O25MNSCE', + 'O25MNt4G', + 'O25MNt4GE', + 'O25MPSD', + 'O25MPSDE', + 'P_edu', + 'P_inc', + 'P_wbinc', + 'P_wpcinc', + 'TotalPop_edu', + 'TotalPop_inc', + 'TotalPop_re', + 'TotalPopeduE', + 'TotalPopiE', + 'TotalPopreE', + 'U10bih', + 'U10bihE', + 'U10i', + 'U10iE', + 'U10nhw', + 'U10nhwE', + 'NAME.y', + '.', + 'values', + 'ind', + 'oid', + 'block.group', + 'V', + 'LQ', + 'LExIs', + 'DEL', + 'SP', + 'km', + 'A', + 'D', + 'G', + 'xPx_star', + 'xPy_star' + ) +) diff --git a/R/hoover.R b/R/hoover.R new file mode 100644 index 0000000..2146a3c --- /dev/null +++ b/R/hoover.R @@ -0,0 +1,383 @@ +#' Delta based on Hoover (1941) and Duncan et al. (1961) +#' +#' Compute the aspatial Delta (Hoover) of a selected racial/ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial/ethnic subgroup(s). See Details for available choices. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Delta (\emph{DEL}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Hoover (1941) \doi{10.1017/S0022050700052980} and Duncan, Cuzzort, and Duncan (1961; LC:60007089). This function provides the computation of \emph{DEL} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' \emph{DEL} is a measure of the proportion of members of one subgroup(s) residing in geographic units with above average density of members of the subgroup(s). The index provides the proportion of a subgroup population that would have to move across geographic units to achieve a uniform density. \emph{DEL} can range in value from 0 to 1. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{DEL} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{DEL} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{DEL} computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{del}}{An object of class 'tbl' for the GEOID, name, and \emph{DEL} at specified larger census geographies.} +#' \item{\code{del_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{DEL}.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_drop_geometry st_within +#' @importFrom stats complete.cases +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Delta (a measure of concentration) of non-Hispanic Black populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' hoover( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = 'NHoLB' +#' ) +#' +#' } +#' +hoover <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[subgroup] + out_names <- c(names(selected_vars), 'ALAND') # save for output + in_subgroup <- paste0(subgroup, 'E') + + # Acquire DEL variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for DEL computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = .[ , in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(.[ , in_subgroup])) + } + + # Compute DEL + ## From Hoover (1961) https://10.1017/S0022050700052980 + ## 0.5\sum_{i=1}^{n}\left|\frac{x_{i}}{X}-\frac{a_{i}}{A}\right| + ## Where for k geographical units i: + ## X denotes the total number of subgroup population in study (reference) area + ## x_{i} denotes the number of subgroup population X in geographical unit i + ## A denotes the total land area in study (reference) area (sum of all a_{i} + ## a_{i} denotes the land area of geographical unit i + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = del_fun, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate(DEL = values, oid = ind) %>% + dplyr::select(DEL, oid) + + # Warning for missingness of census characteristics + missingYN <- out_dat[ , c(in_subgroup, 'ALAND')] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer(cols = dplyr::everything(), names_to = 'variable', values_to = 'val') %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo_large == 'state') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, DEL) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, DEL) %>% + .[.$GEOID != 'NANA', ] + } + if (geo_large == 'county') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, DEL) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, DEL) %>% + .[.$GEOID != 'NANA', ] + } + if (geo_large == 'tract') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, tract, DEL) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, DEL) %>% + .[.$GEOID != 'NANA', ] + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, cbsa, DEL) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, DEL) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'csa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, csa, DEL) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, DEL) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'metro') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, metro, DEL) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, DEL) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + + out <- out %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(del = out, del_data = out_dat, missing = missingYN) + + return(out) +} diff --git a/R/krieger.R b/R/krieger.R index d9652c5..4dfc1de 100644 --- a/R/krieger.R +++ b/R/krieger.R @@ -1,304 +1,476 @@ -#' Index of Concentration at the Extremes based on Feldman et al. (2015) and Krieger et al. (2016) -#' +#' Index of Concentration at the Extremes based on Feldman et al. (2015) and Krieger et al. (2016) +#' #' Compute the aspatial Index of Concentration at the Extremes (Krieger). #' -#' @param geo Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}. +#' @param geo Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}. #' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. #' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. #' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics #' -#' @details This function will compute three aspatial Index of Concentration at the Extremes (ICE) of U.S. census tracts or counties for a specified geographical extent (e.g., entire U.S. or a single state) based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. The authors expanded the metric designed by Massey in a chapter of Booth & Crouter (2001) \doi{10.4324/9781410600141} who initially designed the metric for residential segregation. This function computes five ICE metrics: -#' -#' \itemize{ -#' \item{Income}{80th income percentile vs. 20th income percentile} -#' \item{Education}{less than high school vs. four-year college degree or more} -#' \item{Race/Ethnicity}{white non-Hispanic vs. black non-Hispanic} -#' \item{Income and race/ethnicity combined}{white non-Hispanic in 80th income percentile vs. black alone (including Hispanic) in 20th income percentile} -#' \item{Income and race/ethnicity combined}{white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile} +#' @details This function will compute three aspatial Index of Concentration at the Extremes (\emph{ICE}) of U.S. census tracts or counties for a specified geographical extent (e.g., entire U.S. or a single state) based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. The authors expanded the metric designed by Massey in a chapter of Booth & Crouter (2001) \doi{10.4324/9781410600141} who initially designed the metric for residential segregation. This function computes five \emph{ICE} metrics: +#' +#' \itemize{ +#' \item \strong{Income}: 80th income percentile vs. 20th income percentile +#' \item \strong{Education}: less than high school vs. four-year college degree or more +#' \item \strong{Race/Ethnicity}: white non-Hispanic vs. black non-Hispanic +#' \item \strong{Income and race/ethnicity combined}: white non-Hispanic in 80th income percentile vs. black alone (including Hispanic) in 20th income percentile +#' \item \strong{Income and race/ethnicity combined}: white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile #' } -#' -#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The ACS-5 groups used in the computation of the five ICE metrics are: +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The ACS-5 groups used in the computation of the five \emph{ICE} metrics are: #' \itemize{ -#' \item{B03002: }{HISPANIC OR LATINO ORIGIN BY RACE} -#' \item{B15002: }{SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER} -#' \item{B19001: }{HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS)} -#' \item{B19001B: }{HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) (BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER)} -#' \item{B19001H: }{HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) (WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER)} +#' \item \strong{B03002}: HISPANIC OR LATINO ORIGIN BY RACE +#' \item \strong{B15002}: SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER +#' \item \strong{B19001}: HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) +#' \item \strong{B19001B}: HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) (BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER) +#' \item \strong{B19001H}: HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) (WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER) #' } -#' +#' #' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -#' -#' ICE metrics can range in value from -1 (most deprived) to 1 (most privileged). A value of 0 can thus represent two possibilities: (1) none of the residents are in the most privileged or most deprived categories, or (2) an equal number of persons are in the most privileged and most deprived categories, and in both cases indicates that the area is not dominated by extreme concentrations of either of the two groups. -#' +#' +#' \emph{ICE} metrics can range in value from -1 (most deprived) to 1 (most privileged). A value of 0 can thus represent two possibilities: (1) none of the residents are in the most privileged or most deprived categories, or (2) an equal number of persons are in the most privileged and most deprived categories, and in both cases indicates that the area is not dominated by extreme concentrations of either of the two groups. +#' #' @return An object of class 'list'. This is a named list with the following components: -#' +#' #' \describe{ -#' \item{\code{ice}}{An object of class 'tbl' for the GEOID, name, ICE metrics, and raw census values of specified census geographies.} -#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute the ICEs.} +#' \item{\code{ice}}{An object of class 'tbl' for the GEOID, name, \emph{ICE} metrics, and raw census values of specified census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute the \emph{ICE} metrics.} #' } -#' +#' #' @import dplyr #' @importFrom stringr str_trim #' @importFrom tidycensus get_acs #' @importFrom tidyr pivot_longer separate #' @export -#' +#' #' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). #' #' @examples #' \dontrun{ #' # Wrapped in \dontrun{} because these examples require a Census API key. -#' -#' # Tract-level metric (2020) -#' krieger(geo = "tract", state = "GA", year = 2020) -#' -#' # County-level metric (2020) -#' krieger(geo = "county", state = "GA", year = 2020) -#' +#' +#' # Tract-level metrics (2020) +#' krieger(geo = 'tract', state = 'GA', year = 2020) +#' +#' # County-level metrics (2020) +#' krieger(geo = 'county', state = 'GA', year = 2020) +#' #' } -#' -krieger <- function(geo = "tract", year = 2020, quiet = FALSE, ...) { - - # Check arguments - match.arg(geo, choices = c("county", "tract")) - stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward - - # Select census variables - vars <- c(TotalPopi = "B19001_001", - TotalPopedu = "B15002_001", - TotalPopre = "B03002_001", - U10i = "B19001_002", - B1015i = "B19001_003", - B1520i = "B19001_004", - B2025i = "B19001_005", - B2530i = "B19001_006", - B100125i = "B19001_014", - B125150i = "B19001_015", - B150200i = "B19001_016", - O200i = "B19001_017", - O25MNSC = "B15002_003", - O25FNSC = "B15002_020", - O25MNt4G = "B15002_004", - O25FNt4G = "B15002_021", - O25M5t6G = "B15002_005", - O25F5t6G = "B15002_022", - O25M7t8G = "B15002_006", - O25F7t8G = "B15002_023", - O25M9G = "B15002_007", - O25F9G = "B15002_024", - O25M10G = "B15002_008", - O25F10G = "B15002_025", - O25M11G = "B15002_009", - O25F11G = "B15002_026", - O25M12GND = "B15002_010", - O25F12GND = "B15002_027", - O25MBD = "B15002_015", - O25FBD = "B15002_032", - O25MMD = "B15002_016", - O25FMD = "B15002_033", - O25MPSD = "B15002_017", - O25FPSD = "B15002_034", - O25MDD = "B15002_018", - O25FDD = "B15002_035", - NHoLW = "B03002_003", - NHoLB = "B03002_004", - U10nhw = "B19001H_002", - B1015nhw = "B19001H_003", - B1520nhw = "B19001H_004", - B2025nhw = "B19001H_005", - B2530nhw = "B19001H_006", - B100125nhw = "B19001H_014", - B125150nhw = "B19001H_015", - B150200nhw = "B19001H_016", - O200nhw = "B19001H_017", - U10bih = "B19001B_002", - B1015bih = "B19001B_003", - B1520bih = "B19001B_004", - B2025bih = "B19001B_005", - B2530bih = "B19001B_006") - - # Acquire ICE variables - ice_vars <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo, - year = year, - output = "wide", - variables = vars, ...))) - - if (geo == "tract") { - ice_vars <- ice_vars %>% - tidyr::separate(NAME, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]","", tract)) - } else { - ice_vars <- ice_vars %>% tidyr::separate(NAME, into = c("county", "state"), sep = ",") - } - - ice_vars <- ice_vars %>% - dplyr::mutate(TotalPop_inc = TotalPopiE, - TotalPop_edu = TotalPopeduE, - TotalPop_re = TotalPopreE, - U10i = U10iE, - B1015i = B1015iE, - B1520i = B1520iE, - B2025i = B2025iE, - B2530i = B2530iE, - B100125i = B100125iE, - B125150i = B125150iE, - B150200i = B150200iE, - O200i = O200iE, - O25MNSC = O25MNSCE, - O25FNSC = O25FNSCE, - O25MNt4G = O25MNt4GE, - O25FNt4G = O25FNt4GE, - O25M5t6G = O25M5t6GE, - O25F5t6G = O25F5t6GE, - O25M7t8G = O25M7t8GE, - O25F7t8G = O25F7t8GE, - O25M9G = O25M9GE, - O25F9G = O25F9GE, - O25M10G = O25M10GE, - O25F10G = O25F10GE, - O25M11G = O25M11GE, - O25F11G = O25F11GE, - O25M12GND = O25M12GNDE, - O25F12GND = O25F12GNDE, - O25MBD = O25MBDE, - O25FBD = O25FBDE, - O25MMD = O25MMDE, - O25FMD = O25FMDE, - O25MPSD = O25MPSDE, - O25FPSD = O25FPSDE, - O25MDD = O25MDDE, - O25FDD = O25FDDE, - NHoLW = NHoLWE, - NHoLB = NHoLBE, - U10nhw = U10nhwE, - B1015nhw = B1015nhwE, - B1520nhw = B1520nhwE, - B2025nhw = B2025nhwE, - B2530nhw = B2530nhwE, - B100125nhw = B100125nhwE, - B125150nhw = B125150nhwE, - B150200nhw = B150200nhwE, - O200nhw = O200nhwE, - U10bih = U10bihE, - B1015bih = B1015bihE, - B1520bih = B1520bihE, - B2025bih = B2025bihE, - B2530bih = B2530bihE) - - # Sum educational attainment categories - # A_{edu} = Less than high school / 12 year / GED - # P_{edu} = Four-year college degree or more - ice_vars <- ice_vars %>% - dplyr::mutate(A_edu = O25MBD + O25FBD + O25MMD + O25FMD + O25MPSD + - O25FPSD + O25MDD + O25FDD, - P_edu = O25MNSC + O25FNSC + O25MNt4G + O25FNt4G + - O25M5t6G + O25F5t6G + O25M7t8G + O25F7t8G + - O25M9G + O25F9G + O25M10G + O25F10G + - O25M11G + O25F11G + O25M12GND + O25F12GND) - - # Sum income percentile counts - ## A_income (A_{inc}) is the 80th income percentile - ## P_income (P_{inc}) is the 20th income percentile - ## Add "Total, $25,000 to $34,999" for years 2016 and after - ## Remove "Total, $100,000 to $124,999" for years 2016 and after - ## According to U.S. Census Bureau Table A-4a - ## "Selected Measures of Household Income Dispersion: 1967 to 2020" - if (year < 2016) { - ice_vars <- ice_vars %>% - dplyr::mutate(A_inc = B100125i + B125150i + B150200i + O200i, - P_inc = U10i + B1015i + B1520i + B2025i, - A_wbinc = B100125nhw + B125150nhw + B150200nhw + O200nhw, - P_wbinc = U10bih + B1015bih + B1520bih + B2025bih, - A_wpcinc = B100125nhw + B125150nhw + B150200nhw + O200nhw, - P_wpcinc = U10nhw + B1015nhw + B1520nhw + B2025nhw) - } else { - ice_vars <- ice_vars %>% - dplyr::mutate(A_inc = B125150i + B150200i + O200i, - P_inc = U10i + B1015i + B1520i + B2025i + B2530i, - A_wbinc = B125150nhw + B150200nhw + O200nhw, - P_wbinc = U10bih + B1015bih + B1520bih + B2025bih + B2530bih, - A_wpcinc = B125150nhw + B150200nhw + O200nhw, - P_wpcinc = U10nhw + B1015nhw + B1520nhw + B2025nhw + B2530nhw) - } - - # Compute ICEs - ## From Kreiger et al. (2016) https://doi.org/10.2105%2FAJPH.2015.302955 - ## ICE_{i} = (A_{i} - P_{i}) / T_{i} - ## Where: - ## A_{i} denotes the count within the lowest extreme (e.g., households in 20th income percentile) - ## P_{i} denotes the count within the highest extreme (e.g., households in 80th income percentile) - ## T_{i} denotes the total population in region i (TotalPop) - - ice_vars <- ice_vars %>% - dplyr::mutate(ICE_inc = (A_inc - P_inc) / TotalPop_inc, - ICE_edu = (A_edu - P_edu) / TotalPop_edu, - ICE_rewb = (NHoLW - NHoLB) / TotalPop_re, - ICE_wbinc = (A_wbinc - P_wbinc) / TotalPop_inc, - ICE_wpcinc = (A_wpcinc - P_wpcinc) / TotalPop_inc) - - # Warning for missingness of census characteristics - missingYN <- ice_vars %>% - dplyr::select(U10i, B1015i, B1520i, B2025i, B2530i, B100125i, B125150i, - B150200i, O200i, O25MNSC, O25FNSC,O25MNt4G, O25FNt4G, - O25M5t6G, O25F5t6G, O25M7t8G, O25F7t8G, O25M9G, O25F9G, - O25M10G, O25F10G, O25M11G, O25F11G, O25M12GND, O25F12GND, - O25MBD, O25FBD, O25MMD, O25FMD, O25MPSD, O25FPSD, O25MDD, - O25FDD, NHoLW, NHoLB, U10nhw, B1015nhw, B1520nhw, - B2025nhw, B2530nhw, B100125nhw, B125150nhw, - B150200nhw, O200nhw, U10bih, B1015bih, B1520bih, B2025bih, - B2530bih, TotalPop_inc, TotalPop_edu, TotalPop_re) %>% - tidyr::pivot_longer(cols = dplyr::everything(), - names_to = "variable", - values_to = "val") %>% - dplyr::group_by(variable) %>% - dplyr::summarise(total = dplyr::n(), - n_missing = sum(is.na(val)), - percent_missing = paste0(round(mean(is.na(val)) * 100, 2), " %")) +#' +krieger <- function(geo = 'tract', + year = 2020, + quiet = FALSE, + ...) { - if (quiet == FALSE) { - # Warning for missing census data - if (sum(missingYN$n_missing) > 0) { - message("Warning: Missing census data") + # Check arguments + match.arg(geo, choices = c('county', 'tract')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + + # Select census variables + vars <- c( + TotalPopi = 'B19001_001', + TotalPopedu = 'B15002_001', + TotalPopre = 'B03002_001', + U10i = 'B19001_002', + B1015i = 'B19001_003', + B1520i = 'B19001_004', + B2025i = 'B19001_005', + B2530i = 'B19001_006', + B100125i = 'B19001_014', + B125150i = 'B19001_015', + B150200i = 'B19001_016', + O200i = 'B19001_017', + O25MNSC = 'B15002_003', + O25FNSC = 'B15002_020', + O25MNt4G = 'B15002_004', + O25FNt4G = 'B15002_021', + O25M5t6G = 'B15002_005', + O25F5t6G = 'B15002_022', + O25M7t8G = 'B15002_006', + O25F7t8G = 'B15002_023', + O25M9G = 'B15002_007', + O25F9G = 'B15002_024', + O25M10G = 'B15002_008', + O25F10G = 'B15002_025', + O25M11G = 'B15002_009', + O25F11G = 'B15002_026', + O25M12GND = 'B15002_010', + O25F12GND = 'B15002_027', + O25MBD = 'B15002_015', + O25FBD = 'B15002_032', + O25MMD = 'B15002_016', + O25FMD = 'B15002_033', + O25MPSD = 'B15002_017', + O25FPSD = 'B15002_034', + O25MDD = 'B15002_018', + O25FDD = 'B15002_035', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + U10nhw = 'B19001H_002', + B1015nhw = 'B19001H_003', + B1520nhw = 'B19001H_004', + B2025nhw = 'B19001H_005', + B2530nhw = 'B19001H_006', + B100125nhw = 'B19001H_014', + B125150nhw = 'B19001H_015', + B150200nhw = 'B19001H_016', + O200nhw = 'B19001H_017', + U10bih = 'B19001B_002', + B1015bih = 'B19001B_003', + B1520bih = 'B19001B_004', + B2025bih = 'B19001B_005', + B2530bih = 'B19001B_006' + ) + + # Acquire ICE variables + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo, + year = year, + output = 'wide', + variables = vars, + ... + ) + )) + + if (geo == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } else { + out_dat <- out_dat %>% + tidyr::separate(NAME, into = c('county', 'state'), sep = ',') } + + out_dat <- out_dat %>% + dplyr::mutate( + TotalPop_inc = TotalPopiE, + TotalPop_edu = TotalPopeduE, + TotalPop_re = TotalPopreE, + U10i = U10iE, + B1015i = B1015iE, + B1520i = B1520iE, + B2025i = B2025iE, + B2530i = B2530iE, + B100125i = B100125iE, + B125150i = B125150iE, + B150200i = B150200iE, + O200i = O200iE, + O25MNSC = O25MNSCE, + O25FNSC = O25FNSCE, + O25MNt4G = O25MNt4GE, + O25FNt4G = O25FNt4GE, + O25M5t6G = O25M5t6GE, + O25F5t6G = O25F5t6GE, + O25M7t8G = O25M7t8GE, + O25F7t8G = O25F7t8GE, + O25M9G = O25M9GE, + O25F9G = O25F9GE, + O25M10G = O25M10GE, + O25F10G = O25F10GE, + O25M11G = O25M11GE, + O25F11G = O25F11GE, + O25M12GND = O25M12GNDE, + O25F12GND = O25F12GNDE, + O25MBD = O25MBDE, + O25FBD = O25FBDE, + O25MMD = O25MMDE, + O25FMD = O25FMDE, + O25MPSD = O25MPSDE, + O25FPSD = O25FPSDE, + O25MDD = O25MDDE, + O25FDD = O25FDDE, + NHoLW = NHoLWE, + NHoLB = NHoLBE, + U10nhw = U10nhwE, + B1015nhw = B1015nhwE, + B1520nhw = B1520nhwE, + B2025nhw = B2025nhwE, + B2530nhw = B2530nhwE, + B100125nhw = B100125nhwE, + B125150nhw = B125150nhwE, + B150200nhw = B150200nhwE, + O200nhw = O200nhwE, + U10bih = U10bihE, + B1015bih = B1015bihE, + B1520bih = B1520bihE, + B2025bih = B2025bihE, + B2530bih = B2530bihE + ) + + # Sum educational attainment categories + # A_{edu} = Less than high school / 12 year / GED + # P_{edu} = Four-year college degree or more + out_dat <- out_dat %>% + dplyr::mutate( + A_edu = O25MBD + O25FBD + O25MMD + O25FMD + O25MPSD + O25FPSD + O25MDD + O25FDD, + P_edu = O25MNSC + O25FNSC + O25MNt4G + O25FNt4G + O25M5t6G + O25F5t6G + O25M7t8G + + O25F7t8G + O25M9G + O25F9G + O25M10G + O25F10G + O25M11G + O25F11G + O25M12GND + + O25F12GND + ) + + # Sum income percentile counts + ## A_income (A_{inc}) is the 80th income percentile + ## P_income (P_{inc}) is the 20th income percentile + ## Add 'Total, $25,000 to $34,999' for years 2016 and after + ## Remove 'Total, $100,000 to $124,999' for years 2016 and after + ## According to U.S. Census Bureau Table A-4a + ## 'Selected Measures of Household Income Dispersion: 1967 to 2020' + if (year < 2016) { + out_dat <- out_dat %>% + dplyr::mutate( + A_inc = B100125i + B125150i + B150200i + O200i, + P_inc = U10i + B1015i + B1520i + B2025i, + A_wbinc = B100125nhw + B125150nhw + B150200nhw + O200nhw, + P_wbinc = U10bih + B1015bih + B1520bih + B2025bih, + A_wpcinc = B100125nhw + B125150nhw + B150200nhw + O200nhw, + P_wpcinc = U10nhw + B1015nhw + B1520nhw + B2025nhw + ) + } else { + out_dat <- out_dat %>% + dplyr::mutate( + A_inc = B125150i + B150200i + O200i, + P_inc = U10i + B1015i + B1520i + B2025i + B2530i, + A_wbinc = B125150nhw + B150200nhw + O200nhw, + P_wbinc = U10bih + B1015bih + B1520bih + B2025bih + B2530bih, + A_wpcinc = B125150nhw + B150200nhw + O200nhw, + P_wpcinc = U10nhw + B1015nhw + B1520nhw + B2025nhw + B2530nhw + ) + } + + # Compute ICEs + ## From Kreiger et al. (2016) https://doi.org/10.2105%2FAJPH.2015.302955 + ## ICE_{i} = (A_{i} - P_{i}) / T_{i} + ## Where: + ## A_{i} denotes the count within the lowest extreme (e.g., households in 20th income percentile) + ## P_{i} denotes the count within the highest extreme (e.g., households in 80th income percentile) + ## T_{i} denotes the total population in region i (TotalPop) + + out_dat <- out_dat %>% + dplyr::mutate( + ICE_inc = (A_inc - P_inc) / TotalPop_inc, + ICE_edu = (A_edu - P_edu) / TotalPop_edu, + ICE_rewb = (NHoLW - NHoLB) / TotalPop_re, + ICE_wbinc = (A_wbinc - P_wbinc) / TotalPop_inc, + ICE_wpcinc = (A_wpcinc - P_wpcinc) / TotalPop_inc + ) + + # Warning for missingness of census characteristics + missingYN <- out_dat %>% + dplyr::select( + U10i, + B1015i, + B1520i, + B2025i, + B2530i, + B100125i, + B125150i, + B150200i, + O200i, + O25MNSC, + O25FNSC, + O25MNt4G, + O25FNt4G, + O25M5t6G, + O25F5t6G, + O25M7t8G, + O25F7t8G, + O25M9G, + O25F9G, + O25M10G, + O25F10G, + O25M11G, + O25F11G, + O25M12GND, + O25F12GND, + O25MBD, + O25FBD, + O25MMD, + O25FMD, + O25MPSD, + O25FPSD, + O25MDD, + O25FDD, + NHoLW, + NHoLB, + U10nhw, + B1015nhw, + B1520nhw, + B2025nhw, + B2530nhw, + B100125nhw, + B125150nhw, + B150200nhw, + O200nhw, + U10bih, + B1015bih, + B1520bih, + B2025bih, + B2530bih, + TotalPop_inc, + TotalPop_edu, + TotalPop_re + ) %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo == 'tract') { + out <- out_dat %>% + dplyr::select( + GEOID, + state, + county, + tract, + ICE_inc, + ICE_edu, + ICE_rewb, + ICE_wbinc, + ICE_wpcinc, + U10i, + B1015i, + B1520i, + B2025i, + B2530i, + B100125i, + B125150i, + B150200i, + O200i, + O25MNSC, + O25FNSC, + O25MNt4G, + O25FNt4G, + O25M5t6G, + O25F5t6G, + O25M7t8G, + O25F7t8G, + O25M9G, + O25F9G, + O25M10G, + O25F10G, + O25M11G, + O25F11G, + O25M12GND, + O25F12GND, + O25MBD, + O25FBD, + O25MMD, + O25FMD, + O25MPSD, + O25FPSD, + O25MDD, + O25FDD, + NHoLW, + NHoLB, + U10nhw, + B1015nhw, + B1520nhw, + B2025nhw, + B2530nhw, + B100125nhw, + B125150nhw, + B150200nhw, + O200nhw, + U10bih, + B1015bih, + B1520bih, + B2025bih, + B2530bih, + TotalPop_inc, + TotalPop_edu, + TotalPop_re + ) + } else { + out <- out_dat %>% + dplyr::select( + GEOID, + state, + county, + ICE_inc, + ICE_edu, + ICE_rewb, + ICE_wbinc, + ICE_wpcinc, + U10i, + B1015i, + B1520i, + B2025i, + B2530i, + B100125i, + B125150i, + B150200i, + O200i, + O25MNSC, + O25FNSC, + O25MNt4G, + O25FNt4G, + O25M5t6G, + O25F5t6G, + O25M7t8G, + O25F7t8G, + O25M9G, + O25F9G, + O25M10G, + O25F10G, + O25M11G, + O25F11G, + O25M12GND, + O25F12GND, + O25MBD, + O25FBD, + O25MMD, + O25FMD, + O25MPSD, + O25FPSD, + O25MDD, + O25FDD, + NHoLW, + NHoLB, + U10nhw, + B1015nhw, + B1520nhw, + B2025nhw, + B2530nhw, + B100125nhw, + B125150nhw, + B150200nhw, + O200nhw, + U10bih, + B1015bih, + B1520bih, + B2025bih, + B2530bih, + TotalPop_inc, + TotalPop_edu, + TotalPop_re + ) + } + + out <- out %>% + dplyr::mutate( + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(ice = out, missing = missingYN) + + return(out) } - - # Format output - if (geo == "tract") { - ice <- ice_vars %>% - dplyr::select(GEOID, state, county, tract, - ICE_inc, ICE_edu, ICE_rewb, ICE_wbinc, ICE_wpcinc, - U10i, B1015i, B1520i, B2025i, B2530i, B100125i, B125150i, - B150200i, O200i, O25MNSC, O25FNSC,O25MNt4G, O25FNt4G, - O25M5t6G, O25F5t6G, O25M7t8G, O25F7t8G, O25M9G, O25F9G, - O25M10G, O25F10G, O25M11G, O25F11G, O25M12GND, O25F12GND, - O25MBD, O25FBD, O25MMD, O25FMD, O25MPSD, O25FPSD, O25MDD, - O25FDD, NHoLW, NHoLB, U10nhw, B1015nhw, B1520nhw, - B2025nhw, B2530nhw, B100125nhw, B125150nhw, - B150200nhw, O200nhw, U10bih, B1015bih, B1520bih, B2025bih, - B2530bih, TotalPop_inc, TotalPop_edu, TotalPop_re) - } else { - ice <- ice_vars %>% - dplyr::select(GEOID, state, county, - ICE_inc, ICE_edu, ICE_rewb, ICE_wbinc, ICE_wpcinc, - U10i, B1015i, B1520i, B2025i, B2530i, B100125i, B125150i, - B150200i, O200i, O25MNSC, O25FNSC,O25MNt4G, O25FNt4G, - O25M5t6G, O25F5t6G, O25M7t8G, O25F7t8G, O25M9G, O25F9G, - O25M10G, O25F10G, O25M11G, O25F11G, O25M12GND, O25F12GND, - O25MBD, O25FBD, O25MMD, O25FMD, O25MPSD, O25FPSD, O25MDD, - O25FDD, NHoLW, NHoLB, U10nhw, B1015nhw, B1520nhw, - B2025nhw, B2530nhw, B100125nhw, B125150nhw, - B150200nhw, O200nhw, U10bih, B1015bih, B1520bih, B2025bih, - B2530bih, TotalPop_inc, TotalPop_edu, TotalPop_re) - } - - ice <- ice %>% - dplyr::mutate(state = stringr::str_trim(state), - county = stringr::str_trim(county)) %>% - dplyr::arrange(GEOID) %>% - dplyr::as_tibble() - - out <- list(ice = ice, - missing = missingYN) - - return(out) -} diff --git a/R/lieberson.R b/R/lieberson.R new file mode 100644 index 0000000..07c13ec --- /dev/null +++ b/R/lieberson.R @@ -0,0 +1,390 @@ +#' Isolation Index based on Lieberson (1981) and Bell (1954) +#' +#' Compute the aspatial Isolation Index (Lieberson) of a selected racial/ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial/ethnic subgroup(s). See Details for available choices. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Isolation Index (_xPx\*_) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and Bell (1954) \doi{10.2307/2574118}. This function provides the computation of _xPx\*_ for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' _xPx\*_ is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of their subgroup(s) with higher values signifying higher probability of interaction (less isolation). _xPx\*_ can range in value from 0 to 1. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the _xPx\*_ value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the _xPx\*_ computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the _xPx\*_ computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{xpx_star}}{An object of class 'tbl' for the GEOID, name, and _xPx\*_ at specified larger census geographies.} +#' \item{\code{xpx_star_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute _xPx\*_.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_drop_geometry st_within +#' @importFrom stats complete.cases +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Interaction of non-Hispanic Black vs. non-Hispanic white populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' bell( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = 'NHoLB' +#' ) +#' +#' } +#' +lieberson <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup)] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + + # Acquire xPx* variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for xPx* computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = .[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(.[, in_subgroup])) + } + + # Compute xPx* + ## From Lieberson (1981) in ISBN-13:978-1-032-53884-6 + ## _{x}P_{x}^* = \sum_{i=1}^{k} \left ( \frac{x_{i}}{X}\right )\left ( \frac{x_{i}}{n_{i}}\right ) + ## Where for k geographical units i: + ## X denotes the total number of subgroup population in study (reference) area + ## x_{i} denotes the number of subgroup population X in geographical unit i + ## n_{i} denotes the total population of geographical unit i + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = xpx_star_fun, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate( + xPx_star = values, + oid = ind + ) %>% + dplyr::select(xPx_star, oid) + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup)] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo_large == 'state') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, xPx_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, xPx_star) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'county') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, xPx_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, xPx_star) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'tract') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, tract, xPx_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, xPx_star) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, cbsa, xPx_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, xPx_star) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'csa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, csa, xPx_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, xPx_star) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'metro') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, metro, xPx_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, xPx_star) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + + out <- out %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(xpx_star = out, xpx_star_data = out_dat, missing = missingYN) + + return(out) +} diff --git a/R/messer.R b/R/messer.R index 32cf1ef..f8d62ce 100644 --- a/R/messer.R +++ b/R/messer.R @@ -2,32 +2,32 @@ #' #' Compute the aspatial Neighborhood Deprivation Index (Messer). #' -#' @param geo Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}. +#' @param geo Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}. #' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2010 onward are currently available. #' @param imp Logical. If TRUE, will impute missing census characteristics within the internal \code{\link[psych]{principal}}. If FALSE (the default), will not impute. #' @param quiet Logical. If TRUE, will display messages about potential missing census information and the proportion of variance explained by principal component analysis. The default is FALSE. -#' @param round_output Logical. If TRUE, will round the output of raw census and NDI values from the \code{\link[tidycensus]{get_acs}} at one and four significant digits, respectively. The default is FALSE. +#' @param round_output Logical. If TRUE, will round the output of raw census and \emph{NDI} values from the \code{\link[tidycensus]{get_acs}} at one and four significant digits, respectively. The default is FALSE. #' @param df Optional. Pass a pre-formatted \code{'dataframe'} or \code{'tibble'} with the desired variables through the function. Bypasses the data obtained by \code{\link[tidycensus]{get_acs}}. The default is NULL. See Details below. #' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics #' -#' @details This function will compute the aspatial Neighborhood Deprivation Index (NDI) of U.S. census tracts or counties for a specified geographical referent (e.g., US-standardized) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x}. +#' @details This function will compute the aspatial Neighborhood Deprivation Index (\emph{NDI}) of U.S. census tracts or counties for a specified geographical referent (e.g., US-standardized) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x}. #' #' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for computation involving a principal component analysis with the \code{\link[psych]{principal}} function. The yearly estimates are available for 2010 and after when all census characteristics became available. The eight characteristics are: #' \itemize{ -#' \item{C24030: }{percent males in management, science, and arts occupation} -#' \item{B25014: }{percent of crowded housing} -#' \item{B17017: }{percent of households in poverty} -#' \item{B25115: }{percent of female headed households with dependents} -#' \item{B19058: }{percent of households on public assistance} -#' \item{B19001: }{percent of households earning <$30,000 per year} -#' \item{B06009: }{percent earning less than a high school education} -#' \item{B23025: }{percent unemployed (2011 onward)} -#' \item{B23001: }{percent unemployed (2010 only)} +#' \item \strong{OCC (C24030)}: percent males in management, science, and arts occupation +#' \item \strong{CWD (B25014)}: percent of crowded housing +#' \item \strong{POV (B17017)}: percent of households in poverty +#' \item \strong{FHH (B25115)}: percent of female headed households with dependents +#' \item \strong{PUB (B19058)}: percent of households on public assistance +#' \item \strong{U30 (B19001)}: percent of households earning <$30,000 per year +#' \item \strong{EDU (B06009)}: percent earning less than a high school education +#' \item \strong{EMP (B23025)}: percent unemployed (2011 onward) +#' \item \strong{EMP (B23001)}: percent unemployed (2010 only) #' } #' -#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify the referent for standardizing the NDI (Messer) values. For example, if all U.S. states are specified for the \code{state} argument, then the output would be a U.S.-standardized index. +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify the referent for standardizing the \emph{NDI} (Messer) values. For example, if all U.S. states are specified for the \code{state} argument, then the output would be a U.S.-standardized index. #' -#' The continuous NDI (Messer) values are z-transformed, i.e., "standardized," and the categorical NDI (Messer) values are quartiles of the standardized continuous NDI (Messer) values. +#' The continuous \emph{NDI} (Messer) values are z-transformed, i.e., 'standardized,' and the categorical \emph{NDI} (Messer) values are quartiles of the standardized continuous \emph{NDI} (Messer) values. #' #' Check if the proportion of variance explained by the first principal component is high (more than 0.5). #' @@ -36,9 +36,9 @@ #' @return An object of class 'list'. This is a named list with the following components: #' #' \describe{ -#' \item{\code{ndi}}{An object of class 'tbl' for the GEOID, name, NDI (standardized), NDI (quartile), and raw census values of specified census geographies.} -#' \item{\code{pca}}{An object of class 'principal', returns the output of \code{\link[psych]{principal}} used to compute the NDI values.} -#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute NDI.} +#' \item{\code{ndi}}{An object of class 'tbl' for the GEOID, name, \emph{NDI} (standardized), \emph{NDI} (quartile), and raw census values of specified census geographies.} +#' \item{\code{pca}}{An object of class 'principal', returns the output of \code{\link[psych]{principal}} used to compute the \emph{NDI} values.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{NDI}.} #' } #' #' @import dplyr @@ -58,233 +58,331 @@ #' \dontrun{ #' # Wrapped in \dontrun{} because these examples require a Census API key. #' -#' # Tract-level metric (2020) -#' messer(geo = "tract", state = "GA", year = 2020) +#' # Tract-level NDI (Messer; 2020) +#' messer(geo = 'tract', state = 'GA', year = 2020) #' -#' # Impute NDI for tracts (2020) with missing census information (median values) -#' messer(state = "tract", "GA", year = 2020, imp = TRUE) -#' -#' # County-level metric (2020) -#' messer(geo = "county", state = "GA", year = 2020) +#' # Impute NDI (Messer; 2020) for tracts with missing census information (median values) +#' messer(state = 'tract', state = 'GA', year = 2020, imp = TRUE) #' #' } #' -messer <- function(geo = "tract", year = 2020, imp = FALSE, quiet = FALSE, round_output = FALSE, df = NULL, ...) { +messer <- function(geo = 'tract', + year = 2020, + imp = FALSE, + quiet = FALSE, + round_output = FALSE, + df = NULL, + ...) { # Check arguments - if (!is.null(df) & !inherits(df, c("tbl_df", "tbl", "data.frame"))) { stop("'df' must be class 'data.frame' or 'tbl'") } + if (!is.null(df) & + !inherits(df, c('tbl_df', 'tbl', 'data.frame'))) { + stop("df' must be class 'data.frame' or 'tbl'") + } if (is.null(df)) { - # Check additional arguments - match.arg(geo, choices = c("county", "tract")) + match.arg(geo, choices = c('county', 'tract')) stopifnot(is.numeric(year), year >= 2010) # all variables available 2010 onward # Select census variables - vars <- c(PctMenMgmtBusScArti_num1 = "C24030_018", PctMenMgmtBusScArti_num2 = "C24030_019", - PctMenMgmtBusScArti_den = "C24030_002", - PctCrwdHH_num1 = "B25014_005", PctCrwdHH_num2 = "B25014_006", - PctCrwdHH_num3 = "B25014_007", PctCrwdHH_num4 = "B25014_011", - PctCrwdHH_num5 = "B25014_012", PctCrwdHH_num6 = "B25014_013", - PctCrwdHH_den = "B25014_001", - PctHHPov_num = "B17017_002", PctHHPov_den = "B17017_001", - PctFemHeadKids_num1 = "B25115_012", PctFemHeadKids_num2 = "B25115_025", - PctFemHeadKids_den = "B25115_001", - PctPubAsst_num = "B19058_002", PctPubAsst_den = "B19058_001", - PctHHUnder30K_num1 = "B19001_002", PctHHUnder30K_num2 = "B19001_003", - PctHHUnder30K_num3 = "B19001_004", PctHHUnder30K_num4 = "B19001_005", - PctHHUnder30K_num5 = "B19001_006", PctHHUnder30K_den = "B19001_001", - PctEducLessThanHS_num = "B06009_002", PctEducLessThanHS_den = "B06009_001", - PctUnemp_num = "B23025_005", PctUnemp_den = "B23025_003") + vars <- + c( + PctMenMgmtBusScArti_num1 = 'C24030_018', + PctMenMgmtBusScArti_num2 = 'C24030_019', + PctMenMgmtBusScArti_den = 'C24030_002', + PctCrwdHH_num1 = 'B25014_005', + PctCrwdHH_num2 = 'B25014_006', + PctCrwdHH_num3 = 'B25014_007', + PctCrwdHH_num4 = 'B25014_011', + PctCrwdHH_num5 = 'B25014_012', + PctCrwdHH_num6 = 'B25014_013', + PctCrwdHH_den = 'B25014_001', + PctHHPov_num = 'B17017_002', + PctHHPov_den = 'B17017_001', + PctFemHeadKids_num1 = 'B25115_012', + PctFemHeadKids_num2 = 'B25115_025', + PctFemHeadKids_den = 'B25115_001', + PctPubAsst_num = 'B19058_002', + PctPubAsst_den = 'B19058_001', + PctHHUnder30K_num1 = 'B19001_002', + PctHHUnder30K_num2 = 'B19001_003', + PctHHUnder30K_num3 = 'B19001_004', + PctHHUnder30K_num4 = 'B19001_005', + PctHHUnder30K_num5 = 'B19001_006', + PctHHUnder30K_den = 'B19001_001', + PctEducLessThanHS_num = 'B06009_002', + PctEducLessThanHS_den = 'B06009_001', + PctUnemp_num = 'B23025_005', + PctUnemp_den = 'B23025_003' + ) if (year == 2010) { # Select census variables - vars <- c(vars[-c(26,27)], PctUnemp_den = "B23001_001", - PctUnemp_1619M = "B23001_008", PctUnemp_2021M = "B23001_015", - PctUnemp_2224M = "B23001_022", PctUnemp_2529M = "B23001_029", - PctUnemp_3034M = "B23001_036", PctUnemp_3544M = "B23001_043", - PctUnemp_4554M = "B23001_050", PctUnemp_5559M = "B23001_057", - PctUnemp_6061M = "B23001_064", PctUnemp_6264M = "B23001_071", - PctUnemp_6569M = "B23001_076", PctUnemp_7074M = "B23001_081", - PctUnemp_75upM = "B23001_086", PctUnemp_1619F = "B23001_094", - PctUnemp_2021F = "B23001_101", PctUnemp_2224F = "B23001_108", - PctUnemp_2529F = "B23001_115", PctUnemp_3034F = "B23001_122", - PctUnemp_3544F = "B23001_129", PctUnemp_4554F = "B23001_136", - PctUnemp_5559F = "B23001_143", PctUnemp_6061F = "B23001_150", - PctUnemp_6264F = "B23001_157", PctUnemp_6569F = "B23001_162", - PctUnemp_7074F = "B23001_167", PctUnemp_75upF = "B23001_172") + vars <- c( + vars[-c(26, 27)], + PctUnemp_den = 'B23001_001', + PctUnemp_1619M = 'B23001_008', + PctUnemp_2021M = 'B23001_015', + PctUnemp_2224M = 'B23001_022', + PctUnemp_2529M = 'B23001_029', + PctUnemp_3034M = 'B23001_036', + PctUnemp_3544M = 'B23001_043', + PctUnemp_4554M = 'B23001_050', + PctUnemp_5559M = 'B23001_057', + PctUnemp_6061M = 'B23001_064', + PctUnemp_6264M = 'B23001_071', + PctUnemp_6569M = 'B23001_076', + PctUnemp_7074M = 'B23001_081', + PctUnemp_75upM = 'B23001_086', + PctUnemp_1619F = 'B23001_094', + PctUnemp_2021F = 'B23001_101', + PctUnemp_2224F = 'B23001_108', + PctUnemp_2529F = 'B23001_115', + PctUnemp_3034F = 'B23001_122', + PctUnemp_3544F = 'B23001_129', + PctUnemp_4554F = 'B23001_136', + PctUnemp_5559F = 'B23001_143', + PctUnemp_6061F = 'B23001_150', + PctUnemp_6264F = 'B23001_157', + PctUnemp_6569F = 'B23001_162', + PctUnemp_7074F = 'B23001_167', + PctUnemp_75upF = 'B23001_172' + ) # Acquire NDI variables - ndi_vars <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo, - year = year, - output = "wide", - variables = vars, ...))) + ndi_data <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo, + year = year, + output = 'wide', + variables = vars, + ... + ) + )) - if (geo == "tract") { - ndi_vars <- ndi_vars %>% - tidyr::separate(NAME, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]","", tract)) + if (geo == 'tract') { + ndi_data <- ndi_data %>% + tidyr::separate(NAME, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) } else { - ndi_vars <- ndi_vars %>% tidyr::separate(NAME, into = c("county", "state"), sep = ",") + ndi_data <- + ndi_data %>% tidyr::separate(NAME, into = c('county', 'state'), sep = ',') } - ndi_vars <- ndi_vars %>% - dplyr::mutate(OCC = (PctMenMgmtBusScArti_num1E + PctMenMgmtBusScArti_num2E) / PctMenMgmtBusScArti_denE, - CWD = (PctCrwdHH_num1E + PctCrwdHH_num2E + PctCrwdHH_num3E + - PctCrwdHH_num4E + PctCrwdHH_num5E + PctCrwdHH_num6E) / PctCrwdHH_denE, - POV = PctHHPov_numE / PctHHPov_denE, - FHH = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE, - PUB = PctPubAsst_numE / PctPubAsst_denE, - U30 = (PctHHUnder30K_num1E + PctHHUnder30K_num2E + PctHHUnder30K_num3E + - PctHHUnder30K_num4E + PctHHUnder30K_num5E) / PctHHUnder30K_denE, - EDU = PctEducLessThanHS_numE / PctEducLessThanHS_denE, - EMP = (PctUnemp_1619ME + PctUnemp_2021ME + - PctUnemp_2224ME + PctUnemp_2529ME + - PctUnemp_4554ME + PctUnemp_5559ME + - PctUnemp_6061ME + PctUnemp_6264ME + - PctUnemp_6569ME + PctUnemp_7074ME + - PctUnemp_75upME + PctUnemp_1619FE + - PctUnemp_2021FE + PctUnemp_2224FE + - PctUnemp_2529FE + PctUnemp_4554FE + - PctUnemp_5559FE + PctUnemp_6061FE + - PctUnemp_6264FE + PctUnemp_6569FE + - PctUnemp_7074FE + PctUnemp_75upME) / PctUnemp_denE) + ndi_data <- ndi_data %>% + dplyr::mutate( + OCC = (PctMenMgmtBusScArti_num1E + PctMenMgmtBusScArti_num2E) / PctMenMgmtBusScArti_denE, + CWD = ( + PctCrwdHH_num1E + PctCrwdHH_num2E + PctCrwdHH_num3E + PctCrwdHH_num4E + + PctCrwdHH_num5E + PctCrwdHH_num6E + ) / PctCrwdHH_denE, + POV = PctHHPov_numE / PctHHPov_denE, + FHH = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE, + PUB = PctPubAsst_numE / PctPubAsst_denE, + U30 = ( + PctHHUnder30K_num1E + PctHHUnder30K_num2E + PctHHUnder30K_num3E + PctHHUnder30K_num4E + + PctHHUnder30K_num5E + ) / PctHHUnder30K_denE, + EDU = PctEducLessThanHS_numE / PctEducLessThanHS_denE, + EMP = ( + PctUnemp_1619ME + PctUnemp_2021ME + + PctUnemp_2224ME + PctUnemp_2529ME + + PctUnemp_4554ME + PctUnemp_5559ME + + PctUnemp_6061ME + PctUnemp_6264ME + + PctUnemp_6569ME + PctUnemp_7074ME + + PctUnemp_75upME + PctUnemp_1619FE + + PctUnemp_2021FE + PctUnemp_2224FE + + PctUnemp_2529FE + PctUnemp_4554FE + + PctUnemp_5559FE + PctUnemp_6061FE + + PctUnemp_6264FE + PctUnemp_6569FE + + PctUnemp_7074FE + PctUnemp_75upME + ) / PctUnemp_denE + ) } else { # Acquire NDI variables - ndi_vars <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo, - year = year, - output = "wide", - variables = vars, ...))) - - if (geo == "tract") { - ndi_vars <- ndi_vars %>% - tidyr::separate(NAME, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]","", tract)) + ndi_data <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo, + year = year, + output = 'wide', + variables = vars, + ... + ) + )) + + if (geo == 'tract') { + ndi_data <- ndi_data %>% + tidyr::separate(NAME, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) } else { - ndi_vars <- ndi_vars %>% tidyr::separate(NAME, into = c("county", "state"), sep = ",") + ndi_data <- + ndi_data %>% tidyr::separate(NAME, into = c('county', 'state'), sep = ',') } - ndi_vars <- ndi_vars %>% - dplyr::mutate(OCC = (PctMenMgmtBusScArti_num1E + PctMenMgmtBusScArti_num2E) / PctMenMgmtBusScArti_denE, - CWD = (PctCrwdHH_num1E + PctCrwdHH_num2E + PctCrwdHH_num3E + - PctCrwdHH_num4E + PctCrwdHH_num5E + PctCrwdHH_num6E) / PctCrwdHH_denE, - POV = PctHHPov_numE / PctHHPov_denE, - FHH = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE, - PUB = PctPubAsst_numE / PctPubAsst_denE, - U30 = (PctHHUnder30K_num1E + PctHHUnder30K_num2E + PctHHUnder30K_num3E + - PctHHUnder30K_num4E + PctHHUnder30K_num5E) / PctHHUnder30K_denE, - EDU = PctEducLessThanHS_numE / PctEducLessThanHS_denE, - EMP = PctUnemp_numE / PctUnemp_denE) + ndi_data <- ndi_data %>% + dplyr::mutate( + OCC = (PctMenMgmtBusScArti_num1E + PctMenMgmtBusScArti_num2E) / PctMenMgmtBusScArti_denE, + CWD = ( + PctCrwdHH_num1E + PctCrwdHH_num2E + PctCrwdHH_num3E + PctCrwdHH_num4E + + PctCrwdHH_num5E + PctCrwdHH_num6E + ) / PctCrwdHH_denE, + POV = PctHHPov_numE / PctHHPov_denE, + FHH = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE, + PUB = PctPubAsst_numE / PctPubAsst_denE, + U30 = ( + PctHHUnder30K_num1E + PctHHUnder30K_num2E + PctHHUnder30K_num3E + PctHHUnder30K_num4E + + PctHHUnder30K_num5E + ) / PctHHUnder30K_denE, + EDU = PctEducLessThanHS_numE / PctEducLessThanHS_denE, + EMP = PctUnemp_numE / PctUnemp_denE + ) } # Generate NDI - ndi_vars_pca <- ndi_vars %>% + ndi_data_pca <- ndi_data %>% dplyr::select(OCC, CWD, POV, FHH, PUB, U30, EDU, EMP) } else { - # If inputing pre-formatted data: - ndi_vars <- dplyr::as_tibble(df) - ndi_vars_pca <- df[ , -1] # omits the first feature (column) typically an ID (e.g., GEOID or FIPS) + # If inputing pre-formatted data: + ndi_data <- dplyr::as_tibble(df) + # omit the first feature (column) typically an ID (e.g., GEOID or FIPS) + ndi_data_pca <- df[,-1] } # Replace infinite values as zero (typically because denominator is zero) - ndi_vars_pca <- do.call(data.frame, - lapply(ndi_vars_pca, - function(x) replace(x, is.infinite(x), 0))) + ndi_data_pca <- do.call( + data.frame, + lapply(ndi_data_pca, function(x) replace(x, is.infinite(x), 0)) + ) # Run principal component analysis - pca <- psych::principal(ndi_vars_pca, - nfactors = 1, - n.obs = nrow(ndi_vars_pca), - covar = FALSE, - scores = TRUE, - missing = imp) + pca <- psych::principal( + ndi_data_pca, + nfactors = 1, + n.obs = nrow(ndi_data_pca), + covar = FALSE, + scores = TRUE, + missing = imp + ) # Warning for missingness of census characteristics - missingYN <- ndi_vars_pca %>% - tidyr::pivot_longer(cols = dplyr::everything(), - names_to = "variable", - values_to = "val") %>% + missingYN <- ndi_data_pca %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% dplyr::group_by(variable) %>% - dplyr::summarise(total = dplyr::n(), - n_missing = sum(is.na(val)), - percent_missing = paste0(round(mean(is.na(val)) * 100, 2), " %")) + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) if (quiet == FALSE) { # Warning for missing census data - if (sum(missingYN$n_missing) > 0) { - message("Warning: Missing census data") + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') } # Warning for proportion of variance explained by PC1 if (pca$Vaccounted[2] < 0.50) { - message("Warning: The proportion of variance explained by PC1 is less than 0.50.") + message('Warning: The proportion of variance explained by PC1 is less than 0.50.') } } # NDI quartiles NDIQuart <- data.frame(PC1 = pca$scores) %>% - dplyr::mutate(NDI = PC1 / pca$value[1]^2, - NDIQuart = cut(NDI, - breaks = stats::quantile(NDI, - probs = c(0, 0.25, 0.50, 0.75, 1), - na.rm = TRUE), - labels = c("1-Least deprivation", "2-BelowAvg deprivation", - "3-AboveAvg deprivation", "4-Most deprivation"), - include.lowest = TRUE), - NDIQuart = factor(replace(as.character(NDIQuart), - is.na(NDIQuart), - "9-NDI not avail"), - c(levels(NDIQuart), "9-NDI not avail"))) %>% + dplyr::mutate( + NDI = PC1 / pca$value[1] ^ 2, + NDIQuart = cut( + NDI, + breaks = stats::quantile(NDI, probs = c(0, 0.25, 0.50, 0.75, 1), na.rm = TRUE), + labels = c( + '1-Least deprivation', + '2-BelowAvg deprivation', + '3-AboveAvg deprivation', + '4-Most deprivation' + ), + include.lowest = TRUE + ), + NDIQuart = factor( + replace(as.character(NDIQuart), is.na(NDIQuart), '9-NDI not avail'), + c(levels(NDIQuart), '9-NDI not avail') + ) + ) %>% dplyr::select(NDI, NDIQuart) if (is.null(df)) { # Format output if (round_output == TRUE) { - ndi <- cbind(ndi_vars, NDIQuart) %>% - dplyr::mutate(OCC = round(OCC, digits = 1), - CWD = round(CWD, digits = 1), - POV = round(POV, digits = 1), - FHH = round(FHH, digits = 1), - PUB = round(PUB, digits = 1), - U30 = round(U30, digits = 1), - EDU = round(EDU, digits = 1), - EMP = round(EMP, digits = 1), - NDI = round(NDI, digits = 4)) + ndi <- cbind(ndi_data, NDIQuart) %>% + dplyr::mutate( + OCC = round(OCC, digits = 1), + CWD = round(CWD, digits = 1), + POV = round(POV, digits = 1), + FHH = round(FHH, digits = 1), + PUB = round(PUB, digits = 1), + U30 = round(U30, digits = 1), + EDU = round(EDU, digits = 1), + EMP = round(EMP, digits = 1), + NDI = round(NDI, digits = 4) + ) } else { - ndi <- cbind(ndi_vars, NDIQuart) + ndi <- cbind(ndi_data, NDIQuart) } - if (geo == "tract") { + if (geo == 'tract') { ndi <- ndi %>% - dplyr::select(GEOID, - state, - county, - tract, - NDI, NDIQuart, - OCC, CWD, POV, FHH, PUB, U30, EDU, EMP) + dplyr::select( + GEOID, + state, + county, + tract, + NDI, + NDIQuart, + OCC, + CWD, + POV, + FHH, + PUB, + U30, + EDU, + EMP + ) } else { ndi <- ndi %>% - dplyr::select(GEOID, - state, - county, - NDI, NDIQuart, - OCC, CWD, POV, FHH, PUB, U30, EDU, EMP) + dplyr::select( + GEOID, + state, + county, + NDI, + NDIQuart, + OCC, + CWD, + POV, + FHH, + PUB, + U30, + EDU, + EMP + ) } ndi <- ndi %>% - dplyr::mutate(state = stringr::str_trim(state), - county = stringr::str_trim(county)) %>% + dplyr::mutate( + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% dplyr::arrange(GEOID) %>% dplyr::as_tibble() } else { - ndi <- cbind(df[ , 1], NDIQuart, df[ , 2:ncol(df)]) - ndi <- dplyr::as_tibble(ndi[order(ndi[ , 1]), ]) + ndi <- cbind(df[, 1], NDIQuart, df[, 2:ncol(df)]) + ndi <- dplyr::as_tibble(ndi[order(ndi[, 1]),]) } - out <- list(ndi = ndi, - pca = pca, - missing = missingYN) + out <- list(ndi = ndi, pca = pca, missing = missingYN) return(out) } diff --git a/R/ndi-package.R b/R/ndi-package.R new file mode 100644 index 0000000..29cb891 --- /dev/null +++ b/R/ndi-package.R @@ -0,0 +1,70 @@ +#' The ndi Package: Neighborhood Deprivation Indices +#' +#' Computes various metrics of socio-economic deprivation and disparity in the United States based on information available from the U.S. Census Bureau. +#' +#' @details The 'ndi' package computes various metrics of socio-economic deprivation and disparity in the United States. Some metrics are considered "spatial" because they consider the values of neighboring (i.e., adjacent) census geographies in their computation, while other metrics are "aspatial" because they only consider the value within each census geography. Two types of aspatial neighborhood deprivation indices (\emph{NDI}) are available: (1) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x} and (2) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} who use variables chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x}. Both are a decomposition of multiple demographic characteristics from the U.S. Census Bureau American Community Survey 5-year estimates (ACS-5; 2006-2010 onward). Using data from the ACS-5 (2005-2009 onward), the package can also compute the (1) spatial Racial Isolation Index (\emph{RI}) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002}, (2) spatial Educational Isolation Index (\emph{EI}) based on Bravo et al. (2021) \doi{10.3390/ijerph18179384}, (3) aspatial Index of Concentration at the Extremes (\emph{ICE}) based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}, (4) aspatial racial/ethnic Dissimilarity Index (\emph{DI}) based on Duncan & Duncan (1955) \doi{10.2307/2088328}, (5) aspatial income or racial/ethnic Atkinson Index (\emph{AI}) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}, (6) aspatial racial/ethnic Isolation Index (\emph{II}) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954) \doi{10.2307/2574118}, (7) aspatial racial/ethnic Correlation Ratio (\emph{V}) based on Bell (1954) \doi{10.2307/2574118} and White (1986) \doi{10.2307/3644339}, (8) aspatial racial/ethnic Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}, (9) aspatial racial/ethnic Local Exposure and Isolation (\emph{LEx/Is}) metric based on Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}, (10) aspatial racial/ethnic Delta (\emph{DEL}) based on Hoover (1941) \doi{10.1017/S0022050700052980} and Duncan et al. (1961; LC:60007089), and (11) an index of spatial proximity (\emph{SP}) based on White (1986) \doi{10.2307/3644339} and Blau (1977; ISBN-13:978-0-029-03660-0). Also using data from the ACS-5 (2005-2009 onward), the package can retrieve the aspatial Gini Index (\emph{G}) based on Gini (1921) \doi{10.2307/2223319}. +#' +#' Key content of the 'ndi' package include:\cr +#' +#' \bold{Metrics of Socio-Economic Deprivation and Disparity} +#' +#' \code{\link{anthopolos}} Computes the spatial Racial Isolation Index (\emph{RI}) based on Anthopolos (2011) \doi{10.1016/j.sste.2011.06.002}. +#' +#' \code{\link{atkinson}} Computes the aspatial income or racial/ethnic Atkinson Index (\emph{A}) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}. +#' +#' \code{\link{bell}} Computes the aspatial racial/ethnic Interaction Index (\emph{xPy\*}) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954) \doi{10.2307/2574118}. +#' +#' \code{\link{bemanian_beyer}} Computes the aspatial racial/ethnic Local Exposure and Isolation (\emph{LEx/Is}) metric based on Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}. +#' +#' \code{\link{bravo}} Computes the spatial Educational Isolation Index (\emph{EI}) based on Bravo (2021) \doi{10.3390/ijerph18179384}. +#' +#' \code{\link{duncan}} Computes the aspatial racial/ethnic Dissimilarity Index (\emph{D}) based on Duncan & Duncan (1955) \doi{10.2307/2088328}. +#' +#' \code{\link{gini}} Retrieves the aspatial Gini Index (\emph{G}) based on Gini (1921) \doi{10.2307/2223319}. +#' +#' \code{\link{hoover}} Computes the aspatial racial/ethnic Delta (\emph{DEL}) based on Hoover (1941) \doi{doi:10.1017/S0022050700052980} and Duncan et al. (1961; LC:60007089). +#' +#' \code{\link{krieger}} Computes the aspatial Index of Concentration at the Extremes based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. +#' +#' \code{\link{lieberson}} Computes the aspatial racial/ethnic Isolation Index (\emph{xPx\*}) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and Bell (1954) \doi{10.2307/2574118}. +#' +#' \code{\link{messer}} Computes the aspatial Neighborhood Deprivation Index (\emph{NDI}) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x}. +#' +#' \code{\link{powell_wiley}} Computes the aspatial Neighborhood Deprivation Index (\emph{NDI}) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} who use variables chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x}. +#' +#' \code{\link{sudano}} Computes the aspatial racial/ethnic Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. +#' +#' \code{\link{white}} Computes the aspatial racial/ethnic Correlation Ratio (\emph{V}) based on Bell (1954) \doi{10.2307/2574118} and White (1986) \doi{10.2307/3644339}. +#' +#' \code{\link{white_blau}} Computes an index of spatial proximity (\emph{SP}) based on White (1986) \doi{10.2307/3644339} and Blau (1977; ISBN-13:978-0-029-03660-0). +#' +#' \bold{Pre-formatted U.S. Census Data} +#' +#' \code{\link{DCtracts2020}} A sample dataset containing information about U.S. Census American Community Survey 5-year estimate data for the District of Columbia census tracts (2020). The data are obtained from the \code{\link[tidycensus]{get_acs}} function and formatted for the \code{\link{messer}} and \code{\link{powell_wiley}} functions input. +#' +#' @name ndi-package +#' @aliases ndi-package ndi +#' +#' @section Dependencies: The 'ndi' package relies heavily upon \code{\link{tidycensus}} to retrieve data from the U.S. Census Bureau American Community Survey five-year estimates and the \code{\link{psych}} for computing the neighborhood deprivation indices. The \code{\link{messer}} function builds upon code developed by Hruska et al. (2022) \doi{10.17605/OSF.IO/M2SAV} by fictionalizing, adding the percent of households earning <$30,000 per year to the NDI computation, and providing the option for computing the ACS-5 2006-2010 NDI values. There is no code companion to compute NDI included in Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} or Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002}, but the package author worked directly with the Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} authors to replicate their SAS code in R. The spatial metrics RI and EI rely on the \code{\link{sf}} and \code{\link{Matrix}} packages to compute the geospatial adjacency matrix between census geographies. Internal function to calculate AI is based on \code{\link[DescTools]{Atkinson}} function. There is no code companion to compute RI, EI, DI, II, V, LQ, or LEx/Is included in Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002}, Bravo et al. (2021) \doi{10.3390/ijerph18179384}, Duncan & Duncan (1955) \doi{10.2307/2088328}, Bell (1954) \doi{10.2307/2574118}, White (1986) \doi{10.2307/3644339}, Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}, or Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}, respectively. +#' +#' @author Ian D. Buller\cr \emph{DLH Corporation (formerly Social & Scientific Systems, Inc.), Bethesda, Maryland, USA (current); Occupational and Environmental Epidemiology Branch, Division of Cancer Epidemiology and Genetics, National Cancer Institute, National Institutes of Health, Rockville, Maryland, USA (original).} \cr +#' +#' Maintainer: I.D.B. \email{ian.buller@@alumni.emory.edu} +#' +#' @keywords internal +'_PACKAGE' + +#' @import dplyr +#' @importFrom car logit +#' @importFrom MASS ginv +#' @importFrom Matrix sparseMatrix +#' @importFrom psych alpha principal +#' @importFrom sf st_drop_geometry st_geometry st_intersects st_within +#' @importFrom stats complete.cases cor cov2cor loadings median na.omit promax quantile sd setNames +#' @importFrom stringr str_trim +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions +#' @importFrom units drop_units set_units +#' @importFrom utils stack +NULL diff --git a/R/package.R b/R/package.R deleted file mode 100644 index 4f17aa9..0000000 --- a/R/package.R +++ /dev/null @@ -1,54 +0,0 @@ -#' The ndi Package: Neighborhood Deprivation Indices -#' -#' Computes various metrics of socio-economic deprivation and disparity in the United States based on information available from the U.S. Census Bureau. -#' -#' @details The 'ndi' package computes various metrics of socio-economic deprivation and disparity in the United States. Some metrics are considered "spatial" because they consider the values of neighboring (i.e., adjacent) census geographies in their computation, while other metrics are "aspatial" because they only consider the value within each census geography. Two types of aspatial neighborhood deprivation indices (NDI) are available: (1) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x} and (2) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} who use variables chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x}. Both are a decomposition of multiple demographic characteristics from the U.S. Census Bureau American Community Survey 5-year estimates (ACS-5; 2006-2010 onward). Using data from the ACS-5 (2005-2009 onward), the package can also (1) compute the spatial Racial Isolation Index (RI) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002}, (2) compute the spatial Educational Isolation Index (EI) based on Bravo et al. (2021) \doi{10.3390/ijerph18179384}, (3) compute the aspatial Index of Concentration at the Extremes (ICE) based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}, (4) compute the aspatial Atkinson Index based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}, (5) compute the aspatial Dissimilarity Index based on Duncan & Duncan (1955) \doi{10.2307/2088328}, and (6) retrieve the aspatial Gini Index based on Gini (1921) \doi{10.2307/2223319}. -#' -#' Key content of the 'ndi' package include:\cr -#' -#' \bold{Metrics of Socio-Economic Deprivation and Disparity} -#' -#' \code{\link{anthopolos}} Computes the spatial Racial Isolation Index (RI) based on Anthopolos (2011) \doi{10.1016/j.sste.2011.06.002}. -#' -#' \code{\link{atkinson}} Computes the aspatial Atkinson Index (AI) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}. -#' -#' \code{\link{bravo}} Computes the spatial Educational Isolation Index (EI) based on Bravo (2021) \doi{10.3390/ijerph18179384}. -#' -#' \code{\link{duncan}} Computes the aspatial racial/ethnic Dissimilarity Index based on Duncan & Duncan (1955) \doi{10.2307/2088328}. -#' -#' \code{\link{gini}} Retrieves the aspatial Gini Index based on Gini (1921) \doi{10.2307/2223319}. -#' -#' \code{\link{krieger}} Computes the aspatial Index of Concentration at the Extremes based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. -#' -#' \code{\link{messer}} Computes the aspatial Neighborhood Deprivation Index (NDI) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x}. -#' -#' \code{\link{powell_wiley}} Computes the aspatial Neighborhood Deprivation Index (NDI) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} who use variables chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x}. -#' -#' \bold{Pre-formatted U.S. Census Data} -#' -#' \code{\link{DCtracts2020}} A sample dataset containing information about U.S. Census American Community Survey 5-year estimate data for the District of Columbia census tracts (2020). The data are obtained from the \code{\link[tidycensus]{get_acs}} function and formatted for the \code{\link{messer}} and \code{\link{powell_wiley}} functions input. -#' -#' @name ndi-package -#' @aliases ndi-package ndi -#' @docType package -#' -#' @section Dependencies: The 'ndi' package relies heavily upon \code{\link{tidycensus}} to retrieve data from the U.S. Census Bureau American Community Survey five-year estimates and the \code{\link{psych}} for computing the neighborhood deprivation indices. The \code{\link{messer}} function builds upon code developed by Hruska et al. (2022) \doi{10.17605/OSF.IO/M2SAV} by fictionalizing, adding the percent of households earning <$30,000 per year to the NDI computation, and providing the option for computing the ACS-5 2006-2010 NDI values. There is no code companion to compute NDI included in Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} or Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002}, but the package author worked directly with the Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} authors to replicate their SAS code in R. The spatial metrics RI and EI rely on the \code{\link{sf}} and \code{\link{Matrix}} packages to compute the geospatial adjacency matrix between census geographies. Internal function to calculate AI is based on \code{\link[DescTools]{Atkinson}} function. There is no code companion to compute RI, EI, or DI included in Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002}, Bravo et al. (2021) \doi{10.3390/ijerph18179384}, or Duncan & Duncan (1955) \doi{10.2307/2088328}, respectively. -#' -#' @author Ian D. Buller\cr \emph{Social & Scientific Systems, Inc., a division of DLH Corporation, Silver Spring, Maryland, USA (current); Occupational and Environmental Epidemiology Branch, Division of Cancer Epidemiology and Genetics, National Cancer Institute, National Institutes of Health, Rockville, Maryland, USA (original).} \cr -#' -#' Maintainer: I.D.B. \email{ian.buller@@alumni.emory.edu} -#' -#' @keywords package -NULL - -#' @import dplyr -#' @importFrom MASS ginv -#' @importFrom Matrix sparseMatrix -#' @importFrom psych alpha principal -#' @importFrom sf st_drop_geometry st_geometry st_intersects -#' @importFrom stats complete.cases cor cov2cor loadings median na.omit promax quantile sd setNames -#' @importFrom stringr str_trim -#' @importFrom tidycensus get_acs -#' @importFrom tidyr pivot_longer separate -#' @importFrom utils stack -NULL diff --git a/R/powell_wiley.R b/R/powell_wiley.R index e524419..8708289 100644 --- a/R/powell_wiley.R +++ b/R/powell_wiley.R @@ -1,52 +1,52 @@ #' Neighborhood Deprivation Index based on Andrews et al. (2020) and Slotman et al. (2022) -#' +#' #' Compute the aspatial Neighborhood Deprivation Index (Powell-Wiley). #' -#' @param geo Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}. +#' @param geo Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}. #' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2010 onward are currently available. -#' @param imp Logical. If TRUE, will impute missing census characteristics within the internal \code{\link[psych]{principal}} using median values of variables. If FALSE (the default), will not impute. +#' @param imp Logical. If TRUE, will impute missing census characteristics within the internal \code{\link[psych]{principal}} using median values of variables. If FALSE (the default), will not impute. #' @param quiet Logical. If TRUE, will display messages about potential missing census information, standardized Cronbach's alpha, and proportion of variance explained by principal component analysis. The default is FALSE. -#' @param round_output Logical. If TRUE, will round the output of raw census and NDI values from the \code{\link[tidycensus]{get_acs}} at one and four significant digits, respectively. The default is FALSE. +#' @param round_output Logical. If TRUE, will round the output of raw census and \emph{NDI} values from the \code{\link[tidycensus]{get_acs}} at one and four significant digits, respectively. The default is FALSE. #' @param df Optional. Pass a pre-formatted \code{'dataframe'} or \code{'tibble'} with the desired variables through the function. Bypasses the data obtained by \code{\link[tidycensus]{get_acs}}. The default is NULL. See Details below. #' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics #' -#' @details This function will compute the aspatial Neighborhood Deprivation Index (NDI) of U.S. census tracts or counties for a specified geographical referent (e.g., US-standardized) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002}. -#' +#' @details This function will compute the aspatial Neighborhood Deprivation Index (\emph{NDI}) of U.S. census tracts or counties for a specified geographical referent (e.g., US-standardized) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002}. +#' #' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for computation involving a factor analysis with the \code{\link[psych]{principal}} function. The yearly estimates are available in 2010 and after when all census characteristics became available. The thirteen characteristics chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x} are: #' \itemize{ -#' \item{MedHHInc (5B19013): }{median household income (dollars)} -#' \item{PctRecvIDR (B19054): }{percent of households receiving dividends, interest, or rental income} -#' \item{PctPubAsst (B19058): }{percent of households receiving public assistance} -#' \item{MedHomeVal (B25077): }{median home value (dollars)} -#' \item{PctMgmtBusScArti (C24060): }{percent in a management, business, science, or arts occupation} -#' \item{PctFemHeadKids (B11005): }{percent of households that are female headed with any children under 18 years} -#' \item{PctOwnerOcc (DP04): }{percent of housing units that are owner occupied} -#' \item{PctNoPhone (DP04): }{percent of households without a telephone} -#' \item{PctNComPlm (DP04): }{percent of households without complete plumbing facilities} -#' \item{PctEducHSPlus (S1501): }{percent with a high school degree or higher (population 25 years and over)} -#' \item{PctEducBchPlus (S1501): }{percent with a college degree or higher (population 25 years and over)} -#' \item{PctFamBelowPov (S1702): }{percent of families with incomes below the poverty level} -#' \item{PctUnempl (S2301): }{percent unemployed} +#' \item \strong{MedHHInc (B19013)}: median household income (dollars) +#' \item \strong{PctRecvIDR (B19054)}: percent of households receiving dividends, interest, or rental income +#' \item \strong{PctPubAsst (B19058)}: percent of households receiving public assistance +#' \item \strong{MedHomeVal (B25077)}: median home value (dollars) +#' \item \strong{PctMgmtBusScArti (C24060)}: percent in a management, business, science, or arts occupation +#' \item \strong{PctFemHeadKids (B11005)}: percent of households that are female headed with any children under 18 years +#' \item \strong{PctOwnerOcc (DP04)}: percent of housing units that are owner occupied +#' \item \strong{PctNoPhone (DP04)}: percent of households without a telephone +#' \item \strong{PctNComPlm (DP04)}: percent of households without complete plumbing facilities +#' \item \strong{PctEducHSPlus (S1501)}: percent with a high school degree or higher (population 25 years and over) +#' \item \strong{PctEducBchPlus (S1501)}: percent with a college degree or higher (population 25 years and over) +#' \item \strong{PctFamBelowPov (S1702)}: percent of families with incomes below the poverty level +#' \item \strong{PctUnempl (S2301)}: percent unemployed #' } -#' -#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify the referent for standardizing the NDI (Powell-Wiley) values. For example, if all U.S. states are specified for the \code{state} argument, then the output would be a U.S.-standardized index. Please note: the NDI (Powell-Wiley) values will not exactly match (but will highly correlate with) those found in Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} because the two studies used a different statistical platform (i.e., SPSS and SAS, respectively) that intrinsically calculate the principal component analysis differently from R. -#' -#' The categorical NDI (Powell-Wiley) values are population-weighted quintiles of the continuous NDI (Powell-Wiley) values. -#' +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify the referent for standardizing the \emph{NDI} (Powell-Wiley) values. For example, if all U.S. states are specified for the \code{state} argument, then the output would be a U.S.-standardized index. Please note: the \emph{NDI} (Powell-Wiley) values will not exactly match (but will highly correlate with) those found in Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} because the two studies used a different statistical platform (i.e., SPSS and SAS, respectively) that intrinsically calculate the principal component analysis differently from R. +#' +#' The categorical \emph{NDI} (Powell-Wiley) values are population-weighted quintiles of the continuous \emph{NDI} (Powell-Wiley) values. +#' #' Check if the proportion of variance explained by the first principal component is high (more than 0.5). -#' +#' #' Users can bypass \code{\link[tidycensus]{get_acs}} by specifying a pre-formatted data frame or tibble using the \code{df} argument. This function will compute an index using the first component of a principal component analysis (PCA) with a Promax (oblique) rotation and a minimum Eigenvalue of 1, omitting variables with absolute loading score < 0.4. The recommended structure of the data frame or tibble is an ID (e.g., GEOID) in the first feature (column), an estimate of the total population in the second feature (column), followed by the variables of interest (in any order) and no additional information (e.g., omit state or county names from the \code{df} argument input). -#' +#' #' @return An object of class 'list'. This is a named list with the following components: -#' +#' #' \describe{ -#' \item{\code{ndi}}{An object of class 'tbl' for the GEOID, name, NDI continuous, NDI quintiles, and raw census values of specified census geographies.} -#' \item{\code{pca}}{An object of class 'principal', returns the output of \code{\link[psych]{principal}} used to compute the NDI values.} -#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute NDI.} +#' \item{\code{ndi}}{An object of class 'tbl' for the GEOID, name, \emph{NDI} continuous, \emph{NDI} quintiles, and raw census values of specified census geographies.} +#' \item{\code{pca}}{An object of class 'principal', returns the output of \code{\link[psych]{principal}} used to compute the \emph{NDI} values.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{NDI}.} #' \item{\code{cronbach}}{An object of class 'character' or 'numeric' for the results of the Cronbach's alpha calculation. If only one factor is computed, a message is returned. If more than one factor is computed, Cronbach's alpha is calculated and should check that it is >0.7 for respectable internal consistency between factors.} #' } -#' -#' @import dplyr +#' +#' @import dplyr #' @importFrom MASS ginv #' @importFrom psych alpha principal #' @importFrom stats complete.cases cor cov2cor loadings median promax quantile sd @@ -54,310 +54,414 @@ #' @importFrom tidycensus get_acs #' @importFrom tidyr pivot_longer separate #' @export -#' +#' #' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic referent selection (i.e., \code{state} and \code{county}). #' #' @examples -#' +#' #' powell_wiley(df = DCtracts2020[ , -c(3:10)]) -#' +#' #' \dontrun{ #' # Wrapped in \dontrun{} because these examples require a Census API key. -#' -#' # Tract-level metric (2020) -#' powell_wiley(geo = "tract", state = "GA", year = 2020) #' -#' # Impute NDI for tracts (2020) with missing census information (median values) -#' powell_wiley(state = "tract", "GA", year = 2020, imp = TRUE) -#' -#' # County-level metric (2020) -#' powell_wiley(geo = "county", state = "GA", year = 2020) -#' +#' # Tract-level NDI (Powell-Wiley; 2020) +#' powell_wiley(geo = 'tract', state = 'GA', year = 2020) +#' +#' # Impute NDI (Powell-Wiley; 2020) for tracts with missing census information (median values) +#' powell_wiley(state = 'tract', state = 'GA', year = 2020, imp = TRUE) +#' #' } -#' -powell_wiley <- function(geo = "tract", year = 2020, imp = FALSE, quiet = FALSE, round_output = FALSE, df = NULL, ...) { - - # Check arguments - if (!is.null(df) & !inherits(df, c("tbl_df", "tbl", "data.frame"))) { stop("'df' must be class 'data.frame' or 'tbl'") } +#' +powell_wiley <- function(geo = 'tract', + year = 2020, + imp = FALSE, + quiet = FALSE, + round_output = FALSE, + df = NULL, + ...) { - if (is.null(df)) { - - # Check additional arguments - match.arg(geo, choices = c("county", "tract")) - stopifnot(is.numeric(year), year >= 2010) # all variables available 2010 onward - - # Select census variables - vars <- c(MedHHInc = "B19013_001", - PctRecvIDR_num = "B19054_002", PctRecvIDR_den = "B19054_001", - PctPubAsst_num = "B19058_002", PctPubAsst_den = "B19058_001", - MedHomeVal = "B25077_001", - PctMgmtBusScArti_num = "C24060_002", PctMgmtBusScArti_den = "C24060_001", - PctFemHeadKids_num1 = "B11005_007", PctFemHeadKids_num2 = "B11005_010", - PctFemHeadKids_den = "B11005_001", - PctOwnerOcc = "DP04_0046P", - PctNoPhone = "DP04_0075P", - PctNComPlmb = "DP04_0073P", - PctEduc_num25upHS = "S1501_C01_009", - PctEduc_num25upSC = "S1501_C01_010", - PctEduc_num25upAD = "S1501_C01_011", - PctEduc_num25upBD = "S1501_C01_012", - PctEduc_num25upGD = "S1501_C01_013", - PctEduc_den25up = "S1501_C01_006", - PctFamBelowPov = "S1702_C02_001", - PctUnempl = "S2301_C04_001", - TotalPopulation = "B01001_001") - - # Updated census variable definition(s) - if (year < 2015){ vars <- c(vars[-13], PctNoPhone = "DP04_0074P") } + # Check arguments + if (!is.null(df) & + !inherits(df, c('tbl_df', 'tbl', 'data.frame'))) { + stop("'df' must be class 'data.frame' or 'tbl'") + } - # Acquire NDI variables - ndi_vars <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo, - year = year, - output = "wide", - variables = vars, ...))) + if (is.null(df)) { + # Check additional arguments + match.arg(geo, choices = c('county', 'tract')) + stopifnot(is.numeric(year), year >= 2010) # all variables available 2010 onward + + # Select census variables + vars <- c( + MedHHInc = 'B19013_001', + PctRecvIDR_num = 'B19054_002', + PctRecvIDR_den = 'B19054_001', + PctPubAsst_num = 'B19058_002', + PctPubAsst_den = 'B19058_001', + MedHomeVal = 'B25077_001', + PctMgmtBusScArti_num = 'C24060_002', + PctMgmtBusScArti_den = 'C24060_001', + PctFemHeadKids_num1 = 'B11005_007', + PctFemHeadKids_num2 = 'B11005_010', + PctFemHeadKids_den = 'B11005_001', + PctOwnerOcc = 'DP04_0046P', + PctNoPhone = 'DP04_0075P', + PctNComPlmb = 'DP04_0073P', + PctEduc_num25upHS = 'S1501_C01_009', + PctEduc_num25upSC = 'S1501_C01_010', + PctEduc_num25upAD = 'S1501_C01_011', + PctEduc_num25upBD = 'S1501_C01_012', + PctEduc_num25upGD = 'S1501_C01_013', + PctEduc_den25up = 'S1501_C01_006', + PctFamBelowPov = 'S1702_C02_001', + PctUnempl = 'S2301_C04_001', + TotalPopulation = 'B01001_001' + ) + + # Updated census variable definition(s) + if (year < 2015) { + vars <- c(vars[-13], PctNoPhone = 'DP04_0074P') + } + + # Acquire NDI variables + ndi_data <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo, + year = year, + output = 'wide', + variables = vars, + ... + ) + )) + + + if (geo == 'tract') { + ndi_data <- ndi_data %>% + tidyr::separate(NAME, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } else { + ndi_data <- ndi_data %>% + tidyr::separate(NAME, into = c('county', 'state'), sep = ',') + } + + ndi_data <- ndi_data %>% + dplyr::mutate( + MedHHInc = MedHHIncE, + PctRecvIDR = PctRecvIDR_numE / PctRecvIDR_denE * 100, + PctPubAsst = PctPubAsst_numE / PctPubAsst_denE * 100, + MedHomeVal = MedHomeValE, + PctMgmtBusScArti = PctMgmtBusScArti_numE / PctMgmtBusScArti_denE * 100, + PctFemHeadKids = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / + PctFemHeadKids_denE * 100, + PctOwnerOcc = PctOwnerOccE, + PctNoPhone = PctNoPhoneE, + PctNComPlmb = PctNComPlmbE, + PctEducHSPlus = ( + PctEduc_num25upHSE + PctEduc_num25upSCE + PctEduc_num25upADE + PctEduc_num25upBDE + + PctEduc_num25upGDE + ) / PctEduc_den25upE * 100, + PctEducBchPlus = (PctEduc_num25upBDE + PctEduc_num25upGDE) / PctEduc_den25upE * 100, + PctFamBelowPov = PctFamBelowPovE, + PctUnempl = PctUnemplE, + TotalPop = TotalPopulationE + ) %>% + # Log transform median household income and median home value + # Reverse code percentages so that higher values represent more deprivation + # Round percentages to 1 decimal place + dplyr::mutate( + logMedHHInc = log(MedHHInc), + logMedHomeVal = log(MedHomeVal), + PctNoIDR = 100 - PctRecvIDR, + PctWorkClass = 100 - PctMgmtBusScArti, + PctNotOwnerOcc = 100 - PctOwnerOcc, + PctEducLTHS = 100 - PctEducHSPlus, + PctEducLTBch = 100 - PctEducBchPlus + ) %>% + # Z-standardize the percentages + dplyr::mutate( + PctNoIDRZ = scale(PctNoIDR), + PctPubAsstZ = scale(PctPubAsst), + PctWorkClassZ = scale(PctWorkClass), + PctFemHeadKidsZ = scale(PctFemHeadKids), + PctNotOwnerOccZ = scale(PctNotOwnerOcc), + PctNoPhoneZ = scale(PctNoPhone), + PctNComPlmbZ = scale(PctNComPlmb), + PctEducLTHSZ = scale(PctEducLTHS), + PctEducLTBchZ = scale(PctEducLTBch), + PctFamBelowPovZ = scale(PctFamBelowPov), + PctUnemplZ = scale(PctUnempl) + ) + + # generate NDI + ndi_data_pca <- ndi_data %>% + dplyr::select( + logMedHHInc, + PctNoIDRZ, + PctPubAsstZ, + logMedHomeVal, + PctWorkClassZ, + PctFemHeadKidsZ, + PctNotOwnerOccZ, + PctNoPhoneZ, + PctNComPlmbZ, + PctEducLTHSZ, + PctEducLTBchZ, + PctFamBelowPovZ, + PctUnemplZ + ) + } else { + # If inputing pre-formatted data: + ## rename first and second features (columns) with name to match above + colnames(df)[1:2] <- c('GEOID', 'TotalPop') + ndi_data <- dplyr::as_tibble(df) + ## omit the first two features (columns) typically an ID (e.g., GEOID or FIPS) and TotalPop + ndi_data_pca <- ndi_data[,-c(1:2)] + } + # Run a factor analysis using Promax (oblique) rotation and a minimum Eigenvalue of 1 + nfa <- eigen(stats::cor(ndi_data_pca, use = 'complete.obs')) + nfa <- sum(nfa$values > 1) # count of factors with a minimum Eigenvalue of 1 + fit <- psych::principal(ndi_data_pca, nfactors = nfa, rotate = 'none') + fit_rotate <- stats::promax(stats::loadings(fit), m = 3) - if (geo == "tract") { - ndi_vars <- ndi_vars %>% - tidyr::separate(NAME, into = c("tract", "county", "state"), sep = ",") %>% - dplyr::mutate(tract = gsub("[^0-9\\.]","", tract)) + # Calculate the factors using only variables with an absolute loading score > 0.4 for the first factor + ## If number of factors > 2, use structure matrix, else pattern matrix + if (nfa > 1) { + P_mat <- matrix(stats::loadings(fit_rotate), nrow = 13, ncol = nfa) + + # Structure matrix (S_mat) from under-the-hood of the psych::principal() function + rot.mat <- fit_rotate$rotmat # rotation matrix + ui <- solve(rot.mat) + Phi <- cov2cor(ui %*% t(ui)) # interfactor correlation + S_mat <- P_mat %*% Phi # pattern matrix multiplied by interfactor correlation + } else { - ndi_vars <- ndi_vars %>% tidyr::separate(NAME, into = c("county", "state"), sep = ",") + P_mat <- matrix(fit_rotate, nrow = 13, ncol = 1) + Phi <- 1 + S_mat <- P_mat } - ndi_vars <- ndi_vars %>% - dplyr::mutate(MedHHInc = MedHHIncE, - PctRecvIDR = PctRecvIDR_numE / PctRecvIDR_denE * 100, - PctPubAsst = PctPubAsst_numE / PctPubAsst_denE * 100, - MedHomeVal = MedHomeValE, - PctMgmtBusScArti = PctMgmtBusScArti_numE / PctMgmtBusScArti_denE * 100, - PctFemHeadKids = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE * 100, - PctOwnerOcc = PctOwnerOccE, - PctNoPhone = PctNoPhoneE, - PctNComPlmb = PctNComPlmbE, - PctEducHSPlus = (PctEduc_num25upHSE + PctEduc_num25upSCE + PctEduc_num25upADE + - PctEduc_num25upBDE + PctEduc_num25upGDE) / PctEduc_den25upE * 100, - PctEducBchPlus = (PctEduc_num25upBDE + PctEduc_num25upGDE) / PctEduc_den25upE * 100, - PctFamBelowPov = PctFamBelowPovE, - PctUnempl = PctUnemplE, - TotalPop = TotalPopulationE) %>% - # Log transform median household income and median home value - # Reverse code percentages so that higher values represent more deprivation - # Round percentages to 1 decimal place - dplyr::mutate(logMedHHInc = log(MedHHInc), - logMedHomeVal = log(MedHomeVal), - PctNoIDR = 100 - PctRecvIDR, - PctWorkClass = 100 - PctMgmtBusScArti, - PctNotOwnerOcc = 100 - PctOwnerOcc, - PctEducLTHS = 100 - PctEducHSPlus, - PctEducLTBch = 100 - PctEducBchPlus) %>% - # Z-standardize the percentages - dplyr::mutate(PctNoIDRZ = scale(PctNoIDR), - PctPubAsstZ = scale(PctPubAsst), - PctWorkClassZ = scale(PctWorkClass), - PctFemHeadKidsZ = scale(PctFemHeadKids), - PctNotOwnerOccZ = scale(PctNotOwnerOcc), - PctNoPhoneZ = scale(PctNoPhone), - PctNComPlmbZ = scale(PctNComPlmb), - PctEducLTHSZ = scale(PctEducLTHS), - PctEducLTBchZ = scale(PctEducLTBch), - PctFamBelowPovZ = scale(PctFamBelowPov), - PctUnemplZ = scale(PctUnempl)) + ## Variable correlation matrix (R_mat) + R_mat <- as.matrix(cor(ndi_data_pca[complete.cases(ndi_data_pca),])) - # generate NDI - ndi_vars_pca <- ndi_vars %>% - dplyr::select(logMedHHInc, PctNoIDRZ, PctPubAsstZ, logMedHomeVal, PctWorkClassZ, - PctFemHeadKidsZ, PctNotOwnerOccZ, PctNoPhoneZ, PctNComPlmbZ, PctEducLTHSZ, - PctEducLTBchZ, PctFamBelowPovZ, PctUnemplZ) - } else { - # If inputing pre-formatted data: - colnames(df)[1:2] <- c("GEOID", "TotalPop") # rename first and second features (columns) with name to match above - ndi_vars <- dplyr::as_tibble(df) - ndi_vars_pca <- ndi_vars[ , -c(1:2)] # omits the first two features (columns) typically an ID (e.g., GEOID or FIPS) and TotalPop - } - # Run a factor analysis using Promax (oblique) rotation and a minimum Eigenvalue of 1 - nfa <- eigen(stats::cor(ndi_vars_pca, use = "complete.obs")) - nfa <- sum(nfa$values > 1) # count of factors with a minimum Eigenvalue of 1 - fit <- psych::principal(ndi_vars_pca, - nfactors = nfa, - rotate = "none") - fit_rotate <- stats::promax(stats::loadings(fit), m = 3) - - # Calculate the factors using only variables with an absolute loading score > 0.4 for the first factor - ## If number of factors > 2, use structure matrix, else pattern matrix - if (nfa > 1) { - P_mat <- matrix(stats::loadings(fit_rotate), nrow = 13, ncol = nfa) + ## standardized score coefficients or weight matrix (B_mat) + B_mat <- solve(R_mat, S_mat) - # Structure matrix (S_mat) from under-the-hood of the psych::principal() function - rot.mat <- fit_rotate$rotmat # rotation matrix - ui <- solve(rot.mat) - Phi <- cov2cor(ui %*% t(ui)) # interfactor correlation - S_mat <- P_mat %*% Phi # pattern matrix multiplied by interfactor correlation + # Additional PCA Information + fit_rotate$rotation <- 'promax' + fit_rotate$Phi <- Phi + fit_rotate$Structure <- S_mat - } else { - P_mat <- matrix(fit_rotate, nrow = 13, ncol = 1) - Phi <- 1 - S_mat <- P_mat - } - - ## Variable correlation matrix (R_mat) - R_mat <- as.matrix(cor(ndi_vars_pca[complete.cases(ndi_vars_pca), ])) - - ## standardized score coefficients or weight matrix (B_mat) - B_mat <- solve(R_mat, S_mat) - - # Additional PCA Information - fit_rotate$rotation <- "promax" - fit_rotate$Phi <- Phi - fit_rotate$Structure <- S_mat - - if (nfa > 1) { - fit_rotate$communality <- rowSums(P_mat^2) - } else { - fit_rotate$communality <- P_mat^2 - } - fit_rotate$uniqueness <- diag(R_mat) - fit_rotate$communality - - if (nfa > 1) { - vx <- colSums(P_mat^2) - } else { - vx <- sum(P_mat^2) - } - - vtotal <- sum(fit_rotate$communality + fit_rotate$uniqueness) - vx <- diag(Phi %*% t(P_mat) %*% P_mat) - names(vx) <- colnames(loadings) - varex <- rbind(`SS loadings` = vx) - varex <- rbind(varex, `Proportion Var` = vx/vtotal) - if (nfa > 1) { - varex <- rbind(varex, `Cumulative Var` = cumsum(vx/vtotal)) - varex <- rbind(varex, `Proportion Explained` = vx/sum(vx)) - varex <- rbind(varex, `Cumulative Proportion` = cumsum(vx/sum(vx))) - } - fit_rotate$Vaccounted <- varex - - if (imp == TRUE) { - ndi_vars_scrs <- as.matrix(ndi_vars_pca) - miss <- which(is.na(ndi_vars_scrs), arr.ind = TRUE) - item.med <- apply(ndi_vars_scrs, 2, stats::median, na.rm = TRUE) - ndi_vars_scrs[miss] <- item.med[miss[, 2]] - } else { - ndi_vars_scrs <- ndi_vars_pca - } - - scrs <- as.matrix(scale(ndi_vars_scrs[complete.cases(ndi_vars_scrs), abs(S_mat[ , 1]) > 0.4 ])) %*% B_mat[abs(S_mat[ , 1]) > 0.4, 1] - - ndi_vars_NA <- ndi_vars[complete.cases(ndi_vars_scrs), ] - ndi_vars_NA$NDI <- c(scrs) - - ndi_vars_NDI <- dplyr::left_join(ndi_vars[ , c("GEOID", "TotalPop")], ndi_vars_NA[ , c("GEOID", "NDI")], by = "GEOID") - - # Calculate Cronbach's alpha correlation coefficient among the factors and verify values are above 0.7. - if (nfa == 1) { - crnbch <- "Only one factor with minimum Eigenvalue of 1. Cannot calculate Cronbach's alpha." - } else { - cronbach <- suppressMessages(psych::alpha(ndi_vars_pca[ , abs(S_mat[ , 1]) > 0.4 ], check.keys = TRUE, na.rm = TRUE, warnings = FALSE)) - crnbch <- cronbach$total$std.alpha - } - - # Warning for missingness of census characteristics - missingYN <- ndi_vars_pca %>% - tidyr::pivot_longer(cols = dplyr::everything(), - names_to = "variable", - values_to = "val") %>% - dplyr::group_by(variable) %>% - dplyr::summarise(total = dplyr::n(), - n_missing = sum(is.na(val)), - percent_missing = paste0(round(mean(is.na(val)) * 100, 2), " %")) - - if (quiet == FALSE) { + if (nfa > 1) { + fit_rotate$communality <- rowSums(P_mat ^ 2) + } else { + fit_rotate$communality <- P_mat ^ 2 + } + fit_rotate$uniqueness <- diag(R_mat) - fit_rotate$communality - # Warning for missing census data - if (sum(missingYN$n_missing) > 0) { - message("Warning: Missing census data") + if (nfa > 1) { + vx <- colSums(P_mat ^ 2) + } else { + vx <- sum(P_mat ^ 2) } - # Warning for Cronbach's alpha < 0.7 - if (cronbach$total$std.alpha < 0.7) { - message("Warning: Cronbach's alpha correlation coefficient among the factors is less than 0.7.") + vtotal <- sum(fit_rotate$communality + fit_rotate$uniqueness) + vx <- diag(Phi %*% t(P_mat) %*% P_mat) + names(vx) <- colnames(loadings) + varex <- rbind(`SS loadings` = vx) + varex <- rbind(varex, `Proportion Var` = vx / vtotal) + if (nfa > 1) { + varex <- rbind(varex, `Cumulative Var` = cumsum(vx / vtotal)) + varex <- rbind(varex, `Proportion Explained` = vx / sum(vx)) + varex <- rbind(varex, `Cumulative Proportion` = cumsum(vx / sum(vx))) } + fit_rotate$Vaccounted <- varex - # Warning for proportion of variance explained by FA1 - if (fit_rotate$Vaccounted[2] < 0.50) { - message("Warning: The proportion of variance explained by PC1 is less than 0.50.") + if (imp == TRUE) { + ndi_data_scrs <- as.matrix(ndi_data_pca) + miss <- which(is.na(ndi_data_scrs), arr.ind = TRUE) + item.med <- apply(ndi_data_scrs, 2, stats::median, na.rm = TRUE) + ndi_data_scrs[miss] <- item.med[miss[, 2]] + } else { + ndi_data_scrs <- ndi_data_pca } - } - - # NDI quintiles weighted by tract population - NDIQuint <- ndi_vars_NDI %>% - dplyr::mutate(NDIQuint = cut(NDI*log(TotalPop), - breaks = stats::quantile(NDI*log(TotalPop), - probs = c(0, 0.2, 0.4, 0.6, 0.8, 1), - na.rm = TRUE), - labels = c("1-Least deprivation", "2-BelowAvg deprivation", - "3-Average deprivation","4-AboveAvg deprivation", - "5-Most deprivation"), - include.lowest = TRUE), - NDIQuint = factor(replace(as.character(NDIQuint), - is.na(NDIQuint) | is.infinite(NDIQuint), - "9-NDI not avail"), - c(levels(NDIQuint), "9-NDI not avail"))) %>% - dplyr::select(NDI, NDIQuint) - - if (is.null(df)) { - # Format output - if (round_output == TRUE) { - ndi <- cbind(ndi_vars, NDIQuint) %>% - dplyr::mutate(PctRecvIDR = round(PctRecvIDR, digits = 1), - PctPubAsst = round(PctPubAsst, digits = 1), - PctMgmtBusScArti = round(PctMgmtBusScArti, digits = 1), - PctFemHeadKids = round(PctFemHeadKids, digits = 1), - PctOwnerOcc = round(PctOwnerOcc, digits = 1), - PctNoPhone = round(PctNoPhone, digits = 1), - PctNComPlmb = round(PctNComPlmb, digits = 1), - PctEducHSPlus = round(PctEducHSPlus, digits = 1), - PctEducBchPlus = round(PctEducBchPlus, digits = 1), - PctFamBelowPov = round(PctFamBelowPov, digits = 1), - PctUnempl = round(PctUnempl, digits = 1)) + + scrs <- as.matrix( + scale(ndi_data_scrs[complete.cases(ndi_data_scrs), abs(S_mat[, 1]) > 0.4]) + ) %*% B_mat[abs(S_mat[, 1]) > 0.4, 1] + + ndi_data_NA <- ndi_data[complete.cases(ndi_data_scrs),] + ndi_data_NA$NDI <- c(scrs) + + ndi_data_NDI <- ndi_data[, c('GEOID', 'TotalPop')] %>% + dplyr::left_join(ndi_data_NA[, c('GEOID', 'NDI')], by = dplyr::join_by(GEOID)) + + # Calculate Cronbach's alpha correlation coefficient among the factors and verify values are above 0.7. + if (nfa == 1) { + crnbch <- + "Only one factor with minimum Eigenvalue of 1. Cannot calculate Cronbach's alpha." } else { - ndi <- cbind(ndi_vars, NDIQuint) + cronbach <- suppressMessages(psych::alpha( + ndi_data_pca[, abs(S_mat[, 1]) > 0.4], + check.keys = TRUE, + na.rm = TRUE, + warnings = FALSE + )) + crnbch <- cronbach$total$std.alpha } - if (geo == "tract") { + # Warning for missingness of census characteristics + missingYN <- ndi_data_pca %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + + # Warning for Cronbach's alpha < 0.7 + if (cronbach$total$std.alpha < 0.7) { + message( + "Warning: Cronbach's alpha correlation coefficient among the factors is less than 0.7." + ) + } + + # Warning for proportion of variance explained by FA1 + if (fit_rotate$Vaccounted[2] < 0.50) { + message('Warning: The proportion of variance explained by PC1 is less than 0.50.') + } + } + + # NDI quintiles weighted by tract population + NDIQuint <- ndi_data_NDI %>% + dplyr::mutate( + NDIQuint = cut( + NDI * log(TotalPop), + breaks = stats::quantile( + NDI * log(TotalPop), + probs = c(0, 0.2, 0.4, 0.6, 0.8, 1), + na.rm = TRUE + ), + labels = c( + '1-Least deprivation', + '2-BelowAvg deprivation', + '3-Average deprivation', + '4-AboveAvg deprivation', + '5-Most deprivation' + ), + include.lowest = TRUE + ), + NDIQuint = factor( + replace( + as.character(NDIQuint), + is.na(NDIQuint) | + is.infinite(NDIQuint), + '9-NDI not avail' + ), + c(levels(NDIQuint), '9-NDI not avail') + ) + ) %>% + dplyr::select(NDI, NDIQuint) + + if (is.null(df)) { + # Format output + if (round_output == TRUE) { + ndi <- cbind(ndi_data, NDIQuint) %>% + dplyr::mutate( + PctRecvIDR = round(PctRecvIDR, digits = 1), + PctPubAsst = round(PctPubAsst, digits = 1), + PctMgmtBusScArti = round(PctMgmtBusScArti, digits = 1), + PctFemHeadKids = round(PctFemHeadKids, digits = 1), + PctOwnerOcc = round(PctOwnerOcc, digits = 1), + PctNoPhone = round(PctNoPhone, digits = 1), + PctNComPlmb = round(PctNComPlmb, digits = 1), + PctEducHSPlus = round(PctEducHSPlus, digits = 1), + PctEducBchPlus = round(PctEducBchPlus, digits = 1), + PctFamBelowPov = round(PctFamBelowPov, digits = 1), + PctUnempl = round(PctUnempl, digits = 1) + ) + } else { + ndi <- cbind(ndi_data, NDIQuint) + } + + if (geo == 'tract') { + ndi <- ndi %>% + dplyr::select( + GEOID, + state, + county, + tract, + NDI, + NDIQuint, + MedHHInc, + PctRecvIDR, + PctPubAsst, + MedHomeVal, + PctMgmtBusScArti, + PctFemHeadKids, + PctOwnerOcc, + PctNoPhone, + PctNComPlmb, + PctEducHSPlus, + PctEducBchPlus, + PctFamBelowPov, + PctUnempl, + TotalPop + ) + } else { + ndi <- ndi %>% + dplyr::select( + GEOID, + state, + county, + NDI, + NDIQuint, + MedHHInc, + PctRecvIDR, + PctPubAsst, + MedHomeVal, + PctMgmtBusScArti, + PctFemHeadKids, + PctOwnerOcc, + PctNoPhone, + PctNComPlmb, + PctEducHSPlus, + PctEducBchPlus, + PctFamBelowPov, + PctUnempl, + TotalPop + ) + } + ndi <- ndi %>% - dplyr::select(GEOID, - state, - county, - tract, - NDI, NDIQuint, - MedHHInc, PctRecvIDR, PctPubAsst, MedHomeVal, PctMgmtBusScArti, - PctFemHeadKids,PctOwnerOcc, PctNoPhone, PctNComPlmb, PctEducHSPlus, - PctEducBchPlus, PctFamBelowPov, PctUnempl, TotalPop) + dplyr::mutate( + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + } else { - ndi <- ndi %>% - dplyr::select(GEOID, - state, - county, - NDI, NDIQuint, - MedHHInc, PctRecvIDR, PctPubAsst, MedHomeVal, PctMgmtBusScArti, - PctFemHeadKids,PctOwnerOcc, PctNoPhone, PctNComPlmb, PctEducHSPlus, - PctEducBchPlus, PctFamBelowPov, PctUnempl, TotalPop) + ndi <- cbind(df[, 1], NDIQuint, df[, 2:ncol(df)]) + ndi <- dplyr::as_tibble(ndi[order(ndi[, 1]),]) } - ndi <- ndi %>% - dplyr::mutate(state = stringr::str_trim(state), - county = stringr::str_trim(county)) %>% - dplyr::arrange(GEOID) %>% - dplyr::as_tibble() + out <- list( + ndi = ndi, + pca = fit_rotate, + missing = missingYN, + cronbach = crnbch + ) - } else { - ndi <- cbind(df[ , 1], NDIQuint, df[ , 2:ncol(df)]) - ndi <- dplyr::as_tibble(ndi[order(ndi[ , 1]), ]) + return(out) } - - out <- list(ndi = ndi, - pca = fit_rotate, - missing = missingYN, - cronbach = crnbch) - - return(out) -} diff --git a/R/sudano.R b/R/sudano.R new file mode 100644 index 0000000..317d3b4 --- /dev/null +++ b/R/sudano.R @@ -0,0 +1,362 @@ +#' Location Quotient based on Merton (1938) and Sudano et al. (2013) +#' +#' Compute the aspatial Location Quotient (Sudano) of a selected racial/ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial/ethnic subgroup(s). See Details for available choices. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Location Quotient (\emph{LQ}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. This function provides the computation of \emph{LQ} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' \emph{LQ} is some measure of relative racial homogeneity of each smaller geography within a larger geography. \emph{LQ} can range in value from 0 to infinity because it is ratio of two proportions in which the numerator is the proportion of subgroup population in a smaller geography and the denominator is the proportion of subgroup population in its larger geography. For example, a smaller geography with an \emph{LQ} of 5 means that the proportion of the subgroup population living in the smaller geography is five times the proportion of the subgroup population in its larger geography. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{LQ} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{LQ} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{LQ} computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{lq}}{An object of class 'tbl' for the GEOID, name, and \emph{LQ} at specified smaller census geographies.} +#' \item{\code{lq_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{LQ}.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_drop_geometry st_within +#' @importFrom stats complete.cases +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Location Quotient (a measure of relative homogeneity) of Black populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' sudano( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = c('NHoLB', 'HoLB') +#' ) +#' +#' } +#' +sudano <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup)] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + + # Acquire LQ variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for R computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = .[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(.[, in_subgroup])) + } + + # Compute LQ + ## From Sudano (2013) https://doi.org/10.1016/j.healthplace.2012.09.015 + ## LQ_{im} = (x_{im}/X_{i})/(X_{m}/X) + ## for: + ## i smaller geography and subgroup m + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = lq_fun, omit_NAs = omit_NAs) %>% + do.call('rbind', .) + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup)] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(GEOID)) + + if (geo_small == 'state') { + out <- out %>% + dplyr::select(GEOID, state, LQ) + } + if (geo_small == 'county') { + out <- out %>% + dplyr::select(GEOID, state, county, LQ) + } + if (geo_small == 'tract') { + out <- out %>% + dplyr::select(GEOID, state, county, tract, LQ) + } + if (geo_small == 'block group') { + out <- out %>% + dplyr::select(GEOID, state, county, tract, block.group, LQ) + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + dplyr::select(GEOID, cbsa) %>% + dplyr::left_join(out, ., by = dplyr::join_by(GEOID)) %>% + dplyr::relocate(cbsa, .after = county) + } + if (geo_large == 'csa') { + out <- out_dat %>% + dplyr::select(GEOID, csa) %>% + dplyr::left_join(out, ., by = dplyr::join_by(GEOID)) %>% + dplyr::relocate(csa, .after = county) + } + if (geo_large == 'metro') { + out <- out_dat %>% + dplyr::select(GEOID, metro) %>% + dplyr::left_join(out, ., by = dplyr::join_by(GEOID)) %>% + dplyr::relocate(metro, .after = county) + } + + out <- out %>% + unique(.) %>% + .[.$GEOID != 'NANA',] %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(lq = out, lq_data = out_dat, missing = missingYN) + + return(out) +} diff --git a/R/utils.R b/R/utils.R index e51311f..c52e8ca 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,19 +1,24 @@ -# Internal function for Dissimilarity Index (Duncan & Duncan 1955) +# Internal function for the Dissimilarity Index (Duncan & Duncan 1955) ## Returns NA value if only one smaller geography in a larger geography -di_fun <- function(x, omit_NAs) { - xx <- x[ , c("subgroup", "subgroup_ref")] +d_fun <- function(x, omit_NAs) { + xx <- x[ , c('subgroup', 'subgroup_ref')] if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(xx), ] } if (nrow(x) < 2 || any(xx < 0) || any(is.na(xx))) { NA } else { - 1/2 * sum(abs(xx$subgroup / sum(xx$subgroup, na.rm = TRUE) - xx$subgroup_ref / sum(xx$subgroup_ref, na.rm = TRUE))) + x_i <- xx$subgroup + n_i <- sum(xx$subgroup, na.rm = TRUE) + y_i <- xx$subgroup_ref + m_i <- sum(xx$subgroup_ref, na.rm = TRUE) + D <- 0.5 * sum(abs((x_i/n_i) - (y_i/m_i)), na.rm = TRUE) + return(D) } } -# Internal function for Atkinson Index (Atkinson 1970) +# Internal function for the Atkinson Index (Atkinson 1970) ## Returns NA value if only one smaller geography in a larger geography ## If denoting the Hölder mean -ai_fun <- function(x, epsilon, omit_NAs) { +a_fun <- function(x, epsilon, omit_NAs) { if (omit_NAs == TRUE) { xx <- stats::na.omit(x$subgroup) } else { @@ -30,3 +35,139 @@ ai_fun <- function(x, epsilon, omit_NAs) { } } } + +# Internal function for the aspatial Interaction Index (Bell 1954) +## Returns NA value if only one smaller geography in a larger geography +xpy_star_fun <- function(x, omit_NAs) { + xx <- x[ , c('TotalPopE', 'subgroup', 'subgroup_ixn')] + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(xx), ] } + if (nrow(x) < 2 || any(xx < 0) || any(is.na(xx))) { + NA + } else { + x_i <- xx$subgroup + X <- sum(xx$subgroup, na.rm = TRUE) + y_i <- xx$subgroup_ixn + t_i <- xx$TotalPopE + xPy_star <- sum((x_i / X) * (y_i / t_i), na.rm = TRUE) + return(xPy_star) + } +} + +# Internal function for the aspatial Isolation Index (Lieberson 1981) +## Returns NA value if only one smaller geography in a larger geography +xpx_star_fun <- function(x, omit_NAs) { + xx <- x[ , c('TotalPopE', 'subgroup')] + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(xx), ] } + if (nrow(x) < 2 || any(xx < 0) || any(is.na(xx))) { + NA + } else { + x_i <- xx$subgroup + X <- sum(xx$subgroup, na.rm = TRUE) + t_i <- xx$TotalPopE + xPx_star <- sum((x_i / X) * (x_i / t_i), na.rm = TRUE) + return(xPx_star) + } +} + +# Internal function for the aspatial Correlation Ratio (White 1986) +## Returns NA value if only one smaller geography in a larger geography +v_fun <- function(x, omit_NAs) { + xx <- x[ , c('TotalPopE', 'subgroup')] + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(xx), ] } + if (nrow(x) < 2 || any(xx < 0) || any(is.na(xx))) { + NA + } else { + x_i <- xx$subgroup + X <- sum(xx$subgroup, na.rm = TRUE) + t_i <- xx$TotalPopE + N <- sum(xx$TotalPopE, na.rm = TRUE) + xPx_star <- sum((x_i / X) * (x_i / t_i), na.rm = TRUE) + P <- X / N + V <- (xPx_star - P) / (1 - P) + return(V) + } +} + +# Internal function for the aspatial Location Quotient (Sudano et al. 2013) +## Returns NA value if only one smaller geography in a larger geography +lq_fun <- function(x, omit_NAs) { + xx <- x[ , c('TotalPopE', 'subgroup', 'GEOID')] + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(xx), ] } + if (nrow(x) < 2 || any(xx < 0) || any(is.na(xx))) { + NA + } else { + x_i <- xx$subgroup # x_im + t_i <- xx$TotalPopE # X_i + p_i <- x_i / t_i # p_im + X <- sum(xx$subgroup, na.rm = TRUE) # X_m + N <- sum(xx$TotalPopE, na.rm = TRUE) # X + if (anyNA(p_i)) { p_i[is.na(p_i)] <- 0 } + LQ <- p_i / (X / N) # (x_im/X_i)/(X_m/X) + df <- data.frame(LQ = LQ, GEOID = xx$GEOID) + return(df) + } +} + + + +# Internal function for the aspatial Local Exposure & Isolation (Bemanian & Beyer 2017) metric +## Returns NA value if only one smaller geography in a larger geography +lexis_fun <- function(x, omit_NAs) { + xx <- x[ , c('TotalPopE', 'subgroup', 'subgroup_ixn', 'GEOID')] + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(xx), ] } + if (nrow(x) < 2 || any(xx < 0) || any(is.na(xx))) { + NA + } else { + p_im <- xx$subgroup / xx$TotalPopE + if (anyNA(p_im)) { p_im[is.na(p_im)] <- 0 } + p_in <- xx$subgroup_ixn / xx$TotalPopE + if (anyNA(p_in)) { p_in[is.na(p_in) ] <- 0 } + P_m <- sum(xx$subgroup, na.rm = TRUE) / sum(xx$TotalPopE, na.rm = TRUE) + P_n <- sum(xx$subgroup_ixn, na.rm = TRUE) / sum(xx$TotalPopE, na.rm = TRUE) + LExIs <- car::logit(p_im * p_in) - car::logit(P_m * P_n) + df <- data.frame(LExIs = LExIs, GEOID = xx$GEOID) + return(df) + } +} + +# Internal function for the aspatial Delta (Hoover 1941) +## Returns NA value if only one smaller geography in a larger geography +del_fun <- function(x, omit_NAs) { + xx <- x[ , c('subgroup', 'ALAND')] + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(xx), ] } + if (nrow(x) < 2 || any(xx < 0) || any(is.na(xx))) { + NA + } else { + x_i <- xx$subgroup + X <- sum(xx$subgroup, na.rm = TRUE) + a_i <- xx$ALAND + A <- sum(xx$ALAND, na.rm = TRUE) + DEL <- 0.5 * sum(abs((x_i / X) - (a_i / A)), na.rm = TRUE) + return(DEL) + } +} + +# Internal function for an index of spatial proximity (White 1986) +## Returns NA value if only one smaller geography in a larger geography +sp_fun <- function(x, omit_NAs) { + xx <- x[ , c('TotalPopE', 'subgroup', 'subgroup_ref', 'ALAND')] + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(sf::st_drop_geometry(xx)), ] } + if (nrow(sf::st_drop_geometry(x)) < 2 || any(sf::st_drop_geometry(xx) < 0) || any(is.na(sf::st_drop_geometry(xx)))) { + NA + } else { + d_ij <- suppressWarnings(sf::st_distance(sf::st_centroid(xx), sf::st_centroid(xx))) + diag(d_ij) <- sqrt(0.6 * xx$ALAND) + c_ij <- -d_ij %>% + units::set_units(value = km) %>% + units::drop_units() %>% + exp() + X <- sum(xx$subgroup, na.rm = TRUE) + Y <- sum(xx$subgroup_ref, na.rm = TRUE) + N <- sum(xx$TotalPopE, na.rm = TRUE) + P_xx <- sum((xx$subgroup * xx$subgroup * c_ij) / X^2, na.rm = TRUE) + P_xy <- sum((xx$subgroup * xx$subgroup_ref * c_ij) / (X * Y), na.rm = TRUE) + P_tt <- sum((xx$TotalPopE * xx$TotalPopE * c_ij) / N^2, na.rm = TRUE) + SP <- ((X * P_xx) + (Y * P_xy)) / (N * P_tt) + return(SP) + } +} diff --git a/R/white.R b/R/white.R new file mode 100644 index 0000000..fdd32f0 --- /dev/null +++ b/R/white.R @@ -0,0 +1,382 @@ +#' Correlation Ratio based on Bell (1954) and White (1986) +#' +#' Compute the aspatial Correlation Ratio (White) of a selected racial/ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial/ethnic subgroup(s). See Details for available choices. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Correlation Ratio (\emph{V} or \eqn{Eta^{2}}{Eta^2}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Bell (1954) \doi{10.2307/2574118} and White (1986) \doi{10.2307/3644339}. This function provides the computation of \emph{V} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' \emph{V} removes the asymmetry from the Isolation Index (Bell) by controlling for the effect of population composition. The Isolation Index (Bell) is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s) with higher values signifying higher probability of interaction (less isolation). \emph{V} can range in value from 0 to Inf. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{V} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{V} computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{v}}{An object of class 'tbl' for the GEOID, name, and \emph{V} at specified larger census geographies.} +#' \item{\code{v_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{V}.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_drop_geometry st_within +#' @importFrom stats complete.cases +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Correlation Ratio (a measure of isolation) of Black populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' white( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = c('NHoLB', 'HoLB') +#' ) +#' +#' } +#' +white <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup)] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + + # Acquire V variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for R computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate(oid = STATEFP, state = stringr::str_trim(state)) %>% + sf::st_drop_geometry() + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) %>% + sf::st_drop_geometry() + } + + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = .[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(.[, in_subgroup])) + } + + # Compute V or \mathit{Eta}^{2} + ## From White (1986) https://doi.org/10.2307/3644339 + ## V = \mathit{Eta}^2 = [(_{x}P_{x}^* - P) / (1 - P)] + ## Where: + ## _{x}P_{x}^* denotes the Isolation Index (Bell) of subgroup x + ## P denotes the proportion of subgroup x of study (reference) area + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = v_fun, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate(V = values, oid = ind) %>% + dplyr::select(V, oid) + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup)] + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo_large == 'state') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, V) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, V) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'county') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, V) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, V) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'tract') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, tract, V) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, V) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, cbsa, V) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, V) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'csa') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, csa, V) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, V) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'metro') { + out <- out_dat %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, metro, V) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, V) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + + out <- out %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(v = out, v_data = out_dat, missing = missingYN) + + return(out) +} diff --git a/R/white_blau.R b/R/white_blau.R new file mode 100644 index 0000000..b13e134 --- /dev/null +++ b/R/white_blau.R @@ -0,0 +1,435 @@ +#' An index of spatial proximity based on White (1986) and Blau (1977) +#' +#' Compute an index of spatial proximity (White) of a selected racial/ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial/ethnic subgroup(s) as the comparison population. See Details for available choices. +#' @param subgroup_ref Character string specifying the racial/ethnic subgroup(s) as the reference population. See Details for available choices. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute an index of spatial proximity (\emph{SP}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on White (1986) \doi{10.2307/3644339} and Blau (1977; ISBN-13:978-0-029-03660-0). This function provides the computation of \emph{SP} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' \emph{SP} is a measure of clustering of racial/ethnic populations within smaller geographical areas that are located within larger geographical areas. \emph{SP} can range in value from 0 to Inf and represents the degree to which an area is a racial or ethnic enclave. A value of 1 indicates there is no differential clustering between subgroup and referent group members. A value greater than 1 indicates subgroup members live nearer to one another than to referent subgroup members. A value less than 1 indicates subgroup live nearer to and referent subgroup members than to their own subgroup members. +#' +#' The metric uses the exponential transform of a distance matrix (kilometers) between smaller geographical area centroids, with a diagonal defined as \code{(0.6*a_{i})^{0.5}} where \code{a_{i}} is the area (square kilometers) of smaller geographical unit \code{i} as defined by White (1983) \doi{10.1086/227768}. +#' +#' Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{SP} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{SP} computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{sp}}{An object of class 'tbl' for the GEOID, name, and \emph{SP} at specified larger census geographies.} +#' \item{\code{sp_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{SP}.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_centroid st_distance st_drop_geometry st_within +#' @importFrom stats complete.cases +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions +#' @importFrom units drop_units set_units +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Index of spatial proximity of non-Hispanic Black vs. non-Hispanic white populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' white_blau( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = 'NHoLB', +#' subgroup_ref = 'NHoLW' +#' ) +#' +#' } +#' +white_blau <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + subgroup_ref, + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + match.arg( + subgroup_ref, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup, subgroup_ref)] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + in_subgroup_ref <- paste0(subgroup_ref, 'E') + + # Acquire SP variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('block.group', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + block.group = gsub('[^0-9\\.]', '', block.group) + ) + } + + # Grouping IDs for SP computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metro Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + + # Count of racial/ethnic subgroup populations + ## Count of racial/ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = as.data.frame(.)[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(as.data.frame(.)[, in_subgroup])) + } + ## Count of racial/ethnic reference subgroup population + if (length(in_subgroup_ref) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ref = as.data.frame(.)[, in_subgroup_ref]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ref = rowSums(as.data.frame(.)[, in_subgroup_ref])) + } + + # Compute SP + ## From White (1986) https://doi.org/10.2307/3644339} + ## D_{jt} = 1/2 \sum_{i=1}^{k} | \frac{x_{ijt}}{X_{jt}}-\frac{y_{ijt}}{Y_{jt}}| + ## Where for k smaller geographies: + ## D_{jt} denotes the DI of larger geography j at time t + ## x_{ijt} denotes the racial/ethnic subgroup population of smaller geography i within larger geography j at time t + ## X_{jt} denotes the racial/ethnic subgroup population of larger geography j at time t + ## y_{ijt} denotes the racial/ethnic referent subgroup population of smaller geography i within larger geography j at time t + ## Y_{jt} denotes the racial/ethnic referent subgroup population of larger geography j at time t + + ## Compute + out_tmp <- out_dat %>% + split(., f = list(out_dat$oid)) %>% + lapply(., FUN = sp_fun, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate( + SP = values, + oid = ind + ) %>% + dplyr::select(SP, oid) %>% + sf::st_drop_geometry() + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup, in_subgroup_ref)] %>% + sf::st_drop_geometry() + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + if (geo_large == 'state') { + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, SP) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, SP) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'county') { + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, SP) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, SP) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'tract') { + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, state, county, tract, SP) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, SP) %>% + .[.$GEOID != 'NANA',] + } + if (geo_large == 'cbsa') { + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, cbsa, SP) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, SP) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'csa') { + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, csa, SP) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, SP) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + if (geo_large == 'metro') { + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) %>% + dplyr::select(oid, metro, SP) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, SP) %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::filter(stats::complete.cases(.)) + } + + out <- out %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(sp = out, sp_data = out_dat, missing = missingYN) + + return(out) +} diff --git a/R/zzz.R b/R/zzz.R index e8d7e4a..1579f24 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,3 @@ .onAttach <- function(...) { - packageStartupMessage(paste("\nWelcome to {ndi} version ", utils::packageDescription("ndi")$Version, "\n> help(\"ndi\") # for documentation\n> citation(\"ndi\") # for how to cite\n", sep = ""), appendLF = TRUE) + packageStartupMessage(paste('\nWelcome to {ndi} version ', utils::packageDescription('ndi')$Version, '\n> help(\'ndi\') # for documentation\n> citation(\'ndi\') # for how to cite\n', sep = ''), appendLF = TRUE) } diff --git a/README.md b/README.md index 70032f0..6cb02f9 100644 --- a/README.md +++ b/README.md @@ -1,77 +1,112 @@ -ndi: Neighborhood Deprivation Indices -=================================================== +# ndi: Neighborhood Deprivation Indices + [![R-CMD-check](https://github.com/idblr/ndi/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/idblr/ndi/actions/workflows/R-CMD-check.yaml) -[![CRAN status](http://www.r-pkg.org/badges/version/ndi)](https://cran.r-project.org/package=ndi) -[![CRAN version](https://www.r-pkg.org/badges/version-ago/ndi)](https://cran.r-project.org/package=ndi) +[![CRAN status](https://r-pkg.org/badges/version/ndi)](https://cran.r-project.org/package=ndi) +[![CRAN version](https://r-pkg.org/badges/version-ago/ndi)](https://cran.r-project.org/package=ndi) [![CRAN RStudio mirror downloads total](https://cranlogs.r-pkg.org/badges/grand-total/ndi?color=blue)](https://r-pkg.org/pkg/ndi) -[![CRAN RStudio mirror downloads monthly ](http://cranlogs.r-pkg.org/badges/ndi)](https://www.r-pkg.org:443/pkg/ndi) -[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![CRAN RStudio mirror downloads monthly ](https://cranlogs.r-pkg.org/badges/ndi)](https://r-pkg.org:443/pkg/ndi) +[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/license/apache-2-0) ![GitHub last commit](https://img.shields.io/github/last-commit/idblr/ndi) [![DOI](https://zenodo.org/badge/521439746.svg)](https://zenodo.org/badge/latestdoi/521439746) -**Date repository last updated**: December 15, 2022 +**Date repository last updated**: 2024-08-22 ### Overview -The `ndi` package is a suite of `R` functions to compute various metrics of socio-economic deprivation and disparity in the United States. Some metrics are considered "spatial" because they consider the values of neighboring (i.e., adjacent) census geographies in their computation, while other metrics are "aspatial" because they only consider the value within each census geography. Two types of aspatial NDI are available: (1) based on [Messer et al. (2006)](https://doi.org/10.1007/s11524-006-9094-x) and (2) based on [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) and [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) who use variables chosen by [Roux and Mair (2010)](https://doi.org/10.1111/j.1749-6632.2009.05333.x). Both are a decomposition of various demographic characteristics from the U.S. Census Bureau American Community Survey 5-year estimates (ACS-5; 2006-2010 onward) pulled by the [tidycensus](https://CRAN.R-project.org/package=tidycensus) package. Using data from the ACS-5 (2005-2009 onward), the `ndi` package can also (1) compute the spatial Racial Isolation Index (RI) based on [Anthopolos et al. (2011)](https://www.doi.org/10.1016/j.sste.2011.06.002), (2) compute the spatial Educational Isolation Index (EI) based on [Bravo et al. (2021)](https://www.doi.org/10.3390/ijerph18179384), (3) compute the aspatial Index of Concentration at the Extremes (ICE) based on [Feldman et al. (2015)](https://www.doi.org/10.1136/jech-2015-205728) and [Krieger et al. (2016)](https://www.doi.org/10.2105/AJPH.2015.302955), (4) compute the aspatial racial/ethnic Dissimilarity Index (DI) based on [Duncan & Duncan (1955)](https://doi.org/10.2307/2088328), (5) compute the aspatial Atkinson Index (DI) based on [Atkinson (1970)](https://doi.org/10.1016/0022-0531(70)90039-6), and (6) retrieve the aspatial Gini Index based on [Gini (1921)](https://www.doi.org/10.2307/2223319). +The *ndi* package is a suite of [**R**](https://cran.r-project.org/) functions to compute various metrics of socio-economic deprivation and disparity in the United States. Some metrics are considered 'spatial' because they consider the values of neighboring (i.e., adjacent) census geographies in their computation, while other metrics are 'aspatial' because they only consider the value within each census geography. Two types of aspatial neighborhood deprivation index (*NDI*) are available: (1) based on [Messer et al. (2006)](https://doi.org/10.1007/s11524-006-9094-x) and (2) based on [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) and [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) who use variables chosen by [Roux and Mair (2010)](https://doi.org/10.1111/j.1749-6632.2009.05333.x). Both are a decomposition of various demographic characteristics from the U.S. Census Bureau American Community Survey 5-year estimates (ACS-5; 2006-2010 onward) pulled by the [tidycensus](https://CRAN.R-project.org/package=tidycensus) package. Using data from the ACS-5 (2005-2009 onward), the *ndi* package can also compute the (1) spatial Racial Isolation Index (*RI*) based on [Anthopolos et al. (2011)](https://doi.org/10.1016/j.sste.2011.06.002), (2) spatial Educational Isolation Index (*EI*) based on [Bravo et al. (2021)](https://doi.org/10.3390/ijerph18179384), (3) aspatial Index of Concentration at the Extremes (*ICE*) based on [Feldman et al. (2015)](https://doi.org/10.1136/jech-2015-205728) and [Krieger et al. (2016)](https://doi.org/10.2105/AJPH.2015.302955), (4) aspatial racial/ethnic Dissimilarity Index (*D*) based on [Duncan & Duncan (1955)](https://doi.org/10.2307/2088328), (5) aspatial income or racial/ethnic Atkinson Index (*A*) based on [Atkinson (1970)](https://doi.org/10.1016/0022-0531(70)90039-6), (6) aspatial racial/ethnic Interaction Index (_xPy\*_) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and [Bell (1954)](https://doi.org/10.2307/2574118), (7) aspatial racial/ethnic Correlation Ratio (*V*) based on [Bell (1954)](https://doi.org/10.2307/2574118) and [White (1986)](https://doi.org/10.2307/3644339), (8) aspatial racial/ethnic Location Quotient (*LQ*) based on [Merton (1939)](https://doi.org/10.2307/2084686) and [Sudano et al. (2013)](https://doi.org/10.1016/j.healthplace.2012.09.015), (9) aspatial racial/ethnic Local Exposure and Isolation (*LEx/Is*) metric based on [Bemanian & Beyer (2017)](https://doi.org/10.1158/1055-9965.EPI-16-0926), (10) aspatial racial/ethnic Delta (*DEL*) based on [Hoover (1941)](https://doi.org/10.1017/S0022050700052980) and Duncan et al. (1961; LC:60007089), (11) an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0), and (12) the aspatial racial/ethnic Isolation Index (_xPx\*_) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and [Bell (1954)](https://doi.org/10.2307/2574118). Also using data from the ACS-5 (2005-2009 onward), the *ndi* package can retrieve the aspatial Gini Index (*G*) based on [Gini (1921)](https://doi.org/10.2307/2223319). ### Installation To install the release version from CRAN: - install.packages("ndi") + install.packages('ndi') To install the development version from GitHub: - devtools::install_github("idblr/ndi") + devtools::install_github('idblr/ndi') ### Available functions --++ - + - - + + + - - + + + - - + + + - - + + + - - + + + - - + + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -
Function Description
anthopolosCompute the spatial Racial Isolation Index (RI) based on Anthopolos et al. (2011)
anthopolosCompute the spatial Racial Isolation Index (RI) based on Anthopolos et al. (2011)
atkinsonCompute the aspatial Atkinson Index (AI) based on Atkinson (1970)
atkinsonCompute the aspatial Atkinson Index (A) based on Atkinson (1970)
bravoCompute the spatial Educational Isolation Index (EI) based on Bravo et al. (2021)
bellCompute the aspatial racial/ethnic Interaction Index (xPy*) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954)
duncanCompute the aspatial racial/ethnic Dissimilarity Index (DI) based on Duncan & Duncan (1955)
bemanian_beyerCompute the aspatial racial/ethnic Local Exposure and Isolation (LEx/Is) metric based on Bemanian & Beyer (2017)
giniRetrieve the aspatial Gini Index based on Gini (1921)
bravoCompute the spatial Educational Isolation Index (EI) based on Bravo et al. (2021)
kriegerCompute the aspatial Index of Concentration at the Extremes (ICE) based on Feldman et al. (2015) and Krieger et al. (2016)
duncanCompute the aspatial racial/ethnic Dissimilarity Index (D) based on Duncan & Duncan (1955)
messerCompute the aspatial Neighboorhood Deprivation Index (NDI) based on Messer et al. (2006)giniRetrieve the aspatial Gini Index (G) based on Gini (1921)
powell_wileyCompute the aspatial Neighboorhood Deprivation Index (NDI) based on Andrews et al. (2020) and Slotman et al. (2022) with variables chosen by Roux and Mair (2010)
hooverCompute the aspatial racial/ethnic Delta (DEL) based on Hoover (1941) and Duncan et al. (1961; LC:60007089)
kriegerCompute the aspatial Index of Concentration at the Extremes (ICE) based on Feldman et al. (2015) and Krieger et al. (2016)
liebersonCompute the aspatial racial/ethnic Isolation Index (xPx*) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and Bell (1954)
messerCompute the aspatial Neighborhood Deprivation Index (NDI) based on Messer et al. (2006)
powell_wileyCompute the aspatial Neighborhood Deprivation Index (NDI) based on Andrews et al. (2020) and Slotman et al. (2022) with variables chosen by Roux and Mair (2010)
sudanoCompute the aspatial racial/ethnic Location Quotient (LQ) based on Merton (1938) and Sudano et al. (2013)
whiteCompute the aspatial racial/ethnic Correlation Ratio (V) based on Bell (1954) and White (1986)
white_blauCompute an index of spatial proximity (SP) based on White (1986) and Blau (1977; ISBN-13:978-0-029-03660-0)
+
The repository also includes the code to create the project hexagon sticker. -

+

### Available sample dataset @@ -79,44 +114,49 @@ The repository also includes the code to create the project hexagon sticker. --++ - + - - + + + -
Data Description
DCtracts2020A sample dataset containing information about U.S. Census American Community Survey 5-year estimate data for the District of Columbia census tracts (2020). The data are obtained from the tidycensus package and formatted for the messer() and powell_wiley() functions input.
DCtracts2020A sample data set containing information about U.S. Census American Community Survey 5-year estimate data for the District of Columbia census tracts (2020). The data are obtained from the tidycensus package and formatted for the messer() and powell_wiley() functions input.
+
### Author -* **Ian D. Buller** - *Social & Scientific Systems, Inc., a division of DLH Corporation, Silver Spring, Maryland (current)* - *Occupational and Environmental Epidemiology Branch, Division of Cancer Epidemiology and Genetics, National Cancer Institute, National Institutes of Health, Rockville, Maryland (original)* - [GitHub](https://github.com/idblr) - [ORCID](https://orcid.org/0000-0001-9477-8582) +* **Ian D. Buller** - *DLH, LLC (formerly Social & Scientific Systems, Inc.), Bethesda, Maryland (current)* - *Occupational and Environmental Epidemiology Branch, Division of Cancer Epidemiology and Genetics, National Cancer Institute, National Institutes of Health, Rockville, Maryland (original)* - [GitHub](https://github.com/idblr) - [ORCID](https://orcid.org/0000-0001-9477-8582) See also the list of [contributors](https://github.com/idblr/ndi/graphs/contributors) who participated in this package, including: * **Jacob Englert** - *Biostatistics and Bioinformatics Doctoral Program, Laney Graduate School, Emory University, Atlanta, Georgia* - [GitHub](https://github.com/jacobenglert) +* **Jessica Gleason** - *Epidemiology Branch, Division of Population Health Research, Eunice Kennedy Shriver National Institute of Child Health and Human Development, National Institutes of Health, Bethesda, Maryland* - [ORCID](https://orcid.org/0000-0001-9877-7931) + * **Chris Prener** - *Real World Evidence Center of Excellence, Pfizer, Inc.* - [GitHub](https://github.com/chris-prener) - [ORCID](https://orcid.org/0000-0002-4310-9888) -* **Jessica Gleason** - *Epidemiology Branch, Division of Population Health Research, Eunice Kennedy Shriver National Institute of Child Health and Human Development, National Institutes of Health, Bethesda, Maryland* - [ORCID](https://orcid.org/0000-0001-9877-7931) +* **Davis Vaughan** - *Posit* - [GitHub](https://github.com/DavisVaughan) - [ORCID](https://orcid.org/0000-0003-4777-038X) Thank you to those who suggested additional metrics, including: -* **Jessica Madrigal** - *Occupational and Environmental Epidemiology Branch, Division of Cancer Epidemiology and Genetics, National Cancer Institute, National Institutes of Health, Rockville, Maryland* - [ORCID](https://orcid.org/0000-0001-5303-5109) - * **David Berrigan** - *Behavioral Research Program, Division of Cancer Control and Population Sciences, National Cancer Institute, National Institutes of Health, Rockville, Maryland* - [ORCID](https://orcid.org/0000-0002-5333-179X) +* **Symielle Gaston** - *Social and Environmental Determinants of Health Equity Group, Epidemiology Branch, National Institute of Environmental Health Sciences, National Institutes of Health, Research Triangle Park, North Carolina* - [ORCID](https://orcid.org/0000-0001-9495-1592) + +* **Jessica Madrigal** - *Occupational and Environmental Epidemiology Branch, Division of Cancer Epidemiology and Genetics, National Cancer Institute, National Institutes of Health, Rockville, Maryland* - [ORCID](https://orcid.org/0000-0001-5303-5109) + ### Getting Started * Step 1: Obtain a unique access key from the U.S. Census Bureau. Follow [this link](http://api.census.gov/data/key_signup.html) to obtain one. -* Step 2: Specify your access key in the `anthopolos()`, `atkinson()`, `bravo()`, `duncan()`, `gini()`, `krieger()`, `messer()`, or `powell_wiley()` functions using the internal `key` argument or by using the `census_api_key()` function from the `tidycensus` package before running the `anthopolos()`, `atkinson()`, `bravo()`, `duncan()`, `gini()`, `krieger()`, `messer()`, or `powell_wiley()` functions (see an example below). +* Step 2: Specify your access key in the [`anthopolos()`](R/anthopolos.R), [`atkinson()`](R/atkinson.R), [`bell()`](R/bell.R), [`bemanian_beyer()`](R/bemanian_beyer.R), [`bravo()`](R/bravo.R), [`duncan()`](R/duncan.R), [`gini()`](R/gini.R), [`hoover()`](R/hoover.R), [`krieger()`](R/krieger.R), [`lieberson()`](R/lieberson.R), [`messer()`](R/messer.R), [`powell_wiley()`](R/powell_wiley.R), [`sudano()`](R/sudano.R), or [`white()`](R/white.R) functions using the internal `key` argument or by using the `census_api_key()` function from the [*tidycensus*](https://cran.r-project.org/package=tidycensus) package before running the [`anthopolos()`](R/anthopolos.R), [`atkinson()`](R/atkinson.R), [`bell()`](R/bell.R), [`bemanian_beyer()`](R/bemanian_beyer.R), [`bravo()`](R/bravo.R), [`duncan()`](R/duncan.R), [`gini()`](R/gini.R), [`hoover()`](R/hoover.R), [`krieger()`](R/krieger.R), [`lieberson()`](R/lieberson.R), [`messer()`](R/messer.R), [`powell_wiley()`](R/powell_wiley.R), [`sudano()`](R/sudano.R), or [`white()`](R/white.R) functions (see an example below). ### Usage @@ -126,9 +166,10 @@ Thank you to those who suggested additional metrics, including: # ------------------ # library(ndi) +library(dplyr) library(ggplot2) -library(sf) # dependency fo the "ndi" package -library(tidycensus) # a dependency for the "ndi" package +library(sf) # dependency fo the 'ndi' package +library(tidycensus) # a dependency for the 'ndi' package library(tigris) # -------- # @@ -137,70 +178,92 @@ library(tigris) ## Access Key for census data download ### Obtain one at http://api.census.gov/data/key_signup.html -tidycensus::census_api_key("...") # INSERT YOUR OWN KEY FROM U.S. CENSUS API +census_api_key('...') # INSERT YOUR OWN KEY FROM U.S. CENSUS API # ---------------------- # # Calculate NDI (Messer) # # ---------------------- # # Compute the NDI (Messer) values (2016-2020 5-year ACS) for Washington, D.C. census tracts -messer2020DC <- ndi::messer(state = "DC", year = 2020) +messer_2020_DC <- messer(state = 'DC', year = 2020) # ------------------------------ # # Outputs from messer() function # # ------------------------------ # -# A tibble containing the identification, geographic name, NDI (Messer) values, NDI (Messer) quartiles, and raw census characteristics for each tract -messer2020DC$ndi +# A tibble containing the identification, geographic name, NDI (Messer) values, NDI (Messer) +# quartiles, and raw census characteristics for each tract +messer_2020_DC$ndi # The results from the principal component analysis used to compute the NDI (Messer) values -messer2020DC$pca +messer_2020_DC$pca -# A tibble containing a breakdown of the missingingness of the census characteristics used to compute the NDI (Messer) values -messer2020DC$missing +# A tibble containing a breakdown of the missingingness of the census characteristics +# used to compute the NDI (Messer) values +messer_2020_DC$missing # -------------------------------------- # # Visualize the messer() function output # # -------------------------------------- # -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) # Join the NDI (Messer) values to the census tract geometry -DC2020messer <- dplyr::left_join(tract2020DC, messer2020DC$ndi, by = "GEOID") +DC_2020_messer <- tract_2020_DC %>% + left_join(messer_2020_DC$ndi, by = 'GEOID') # Visualize the NDI (Messer) values (2016-2020 5-year ACS) for Washington, D.C. census tracts ## Continuous Index -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020messer, - ggplot2::aes(fill = NDI), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Neighborhood Deprivation Index\nContinuous (Messer, non-imputed)", - subtitle = "Washington, D.C. tracts as the referent") +ggplot() + + geom_sf( + data = DC_2020_messer, + aes(fill = NDI), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c() + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Neighborhood Deprivation Index\nContinuous (Messer, non-imputed)', + subtitle = 'Washington, D.C. tracts as the referent' + ) ## Categorical Index (Quartiles) -### Rename "9-NDI not avail" level as NA for plotting -DC2020messer$NDIQuartNA <- factor(replace(as.character(DC2020messer$NDIQuart), - DC2020messer$NDIQuart == "9-NDI not avail", - NA), - c(levels(DC2020messer$NDIQuart)[-5], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020messer, - ggplot2::aes(fill = NDIQuartNA), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey50") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2016-2020 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index\nQuartiles (Messer, non-imputed)", - subtitle = "Washington, D.C. tracts as the referent") +### Rename '9-NDI not avail' level as NA for plotting +DC_2020_messer$NDIQuartNA <- + factor( + replace( + as.character(DC_2020_messer$NDIQuart), + DC_2020_messer$NDIQuart == '9-NDI not avail', + NA + ), + c(levels(DC_2020_messer$NDIQuart)[-5], NA) + ) + +ggplot() + + geom_sf( + data = DC_2020_messer, + aes(fill = NDIQuartNA), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_d( + guide = guide_legend(reverse = TRUE), + na.value = 'grey50' + ) + + labs( + fill = 'Index (Categorical)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Neighborhood Deprivation Index\nQuartiles (Messer, non-imputed)', + subtitle = 'Washington, D.C. tracts as the referent' + ) ``` ![](man/figures/messer1.png) ![](man/figures/messer2.png) @@ -210,66 +273,93 @@ ggplot2::ggplot() + # Calculate NDI (Powell-Wiley) # # ---------------------------- # -# Compute the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for Washington, D.C. census tracts -powell_wiley2020DC <- powell_wiley(state = "DC", year = 2020) -powell_wiley2020DCi <- powell_wiley(state = "DC", year = 2020, imp = TRUE) # impute missing values +# Compute the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for +# Washington, D.C. census tracts +powell_wiley_2020_DC <- powell_wiley(state = 'DC', year = 2020) +# impute missing values +powell_wiley_2020_DCi <- powell_wiley(state = 'DC', year = 2020, imp = TRUE) # ------------------------------------ # # Outputs from powell_wiley() function # # ------------------------------------ # -# A tibble containing the identification, geographic name, NDI (Powell-Wiley) value, and raw census characteristics for each tract -powell_wiley2020DC$ndi +# A tibble containing the identification, geographic name, NDI (Powell-Wiley) value, and +# raw census characteristics for each tract +powell_wiley_2020_DC$ndi -# The results from the principal component analysis used to compute the NDI (Powell-Wiley) values -powell_wiley2020DC$pca +# The results from the principal component analysis used to +# compute the NDI (Powell-Wiley) values +powell_wiley_2020_DC$pca -# A tibble containing a breakdown of the missingingness of the census characteristics used to compute the NDI (Powell-Wiley) values -powell_wiley2020DC$missing +# A tibble containing a breakdown of the missingingness of the census characteristics used to +# compute the NDI (Powell-Wiley) values +powell_wiley_2020_DC$missing # -------------------------------------------- # # Visualize the powell_wiley() function output # # -------------------------------------------- # -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) # Join the NDI (powell_wiley) values to the census tract geometry -DC2020powell_wiley <- dplyr::left_join(tract2020DC, powell_wiley2020DC$ndi, by = "GEOID") -DC2020powell_wiley <- dplyr::left_join(DC2020powell_wiley, powell_wiley2020DCi$ndi, by = "GEOID") +DC_2020_powell_wiley <- tract_2020_DC %>% + left_join(powell_wiley_2020_DC$ndi, by = 'GEOID') +DC_2020_powell_wiley <- DC_2020_powell_wiley %>% + left_join(powell_wiley_2020_DCi$ndi, by = 'GEOID') -# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for Washington, D.C. census tracts +# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for +# Washington, D.C. census tracts ## Non-imputed missing tracts (Continuous) -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020powell_wiley, - ggplot2::aes(fill = NDI.x), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Neighborhood Deprivation Index\nContinuous (Powell-Wiley, non-imputed)", - subtitle = "Washington, D.C. tracts as the referent") +ggplot() + + geom_sf( + data = DC_2020_powell_wiley, + aes(fill = NDI.x), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c() + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Neighborhood Deprivation Index\nContinuous (Powell-Wiley, non-imputed)', + subtitle = 'Washington, D.C. tracts as the referent' + ) ## Non-imputed missing tracts (Categorical quintiles) -### Rename "9-NDI not avail" level as NA for plotting -DC2020powell_wiley$NDIQuintNA.x <- factor(replace(as.character(DC2020powell_wiley$NDIQuint.x), - DC2020powell_wiley$NDIQuint.x == "9-NDI not avail", - NA), - c(levels(DC2020powell_wiley$NDIQuint.x)[-6], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020powell_wiley, - ggplot2::aes(fill = NDIQuintNA.x), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey50") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Neighborhood Deprivation Index\nPopulation-weighted Quintiles (Powell-Wiley, non-imputed)", - subtitle = "Washington, D.C. tracts as the referent") +### Rename '9-NDI not avail' level as NA for plotting +DC_2020_powell_wiley$NDIQuintNA.x <- factor( + replace( + as.character(DC_2020_powell_wiley$NDIQuint.x), + DC_2020_powell_wiley$NDIQuint.x == '9-NDI not avail', + NA + ), + c(levels(DC_2020_powell_wiley$NDIQuint.x)[-6], NA) +) + +ggplot() + + geom_sf( + data = DC_2020_powell_wiley, + aes(fill = NDIQuintNA.x), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_d( + guide = guide_legend(reverse = TRUE), + na.value = 'grey50' + ) + + labs( + fill = 'Index (Categorical)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Neighborhood Deprivation Index\n + Population-weighted Quintiles (Powell-Wiley, non-imputed)', + subtitle = 'Washington, D.C. tracts as the referent' + ) ``` ![](man/figures/powell_wiley1.png) @@ -277,35 +367,53 @@ ggplot2::ggplot() + ``` r ## Imputed missing tracts (Continuous) -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020powell_wiley, - ggplot2::aes(fill = NDI.y), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Neighborhood Deprivation Index\nContinuous (Powell-Wiley, imputed)", - subtitle = "Washington, D.C. tracts as the referent") +ggplot() + + geom_sf( + data = DC_2020_powell_wiley, + aes(fill = NDI.y), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c() + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Neighborhood Deprivation Index\nContinuous (Powell-Wiley, imputed)', + subtitle = 'Washington, D.C. tracts as the referent' + ) ## Imputed missing tracts (Categorical quintiles) -### Rename "9-NDI not avail" level as NA for plotting -DC2020powell_wiley$NDIQuintNA.y <- factor(replace(as.character(DC2020powell_wiley$NDIQuint.y), - DC2020powell_wiley$NDIQuint.y == "9-NDI not avail", - NA), - c(levels(DC2020powell_wiley$NDIQuint.y)[-6], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020powell_wiley, - ggplot2::aes(fill = NDIQuintNA.y), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey50") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Neighborhood Deprivation Index\nPopulation-weighted Quintiles (Powell-Wiley, imputed)", - subtitle = "Washington, D.C. tracts as the referent") +### Rename '9-NDI not avail' level as NA for plotting +DC_2020_powell_wiley$NDIQuintNA.y <- factor( + replace( + as.character(DC_2020_powell_wiley$NDIQuint.y), + DC_2020_powell_wiley$NDIQuint.y == '9-NDI not avail', + NA + ), + c(levels(DC_2020_powell_wiley$NDIQuint.y)[-6], NA) +) + +ggplot() + + geom_sf( + data = DC_2020_powell_wiley, + aes(fill = NDIQuintNA.y), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_d( + guide = guide_legend(reverse = TRUE), + na.value = 'grey50' + ) + + labs( + fill = 'Index (Categorical)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Neighborhood Deprivation Index\nPopulation-weighted Quintiles (Powell-Wiley, imputed)', + subtitle = 'Washington, D.C. tracts as the referent' + ) ``` ![](man/figures/powell_wiley3.png) @@ -317,176 +425,251 @@ ggplot2::ggplot() + # --------------------------- # # Merge the two NDI metrics (Messer and Powell-Wiley, imputed) -ndi2020DC <- dplyr::left_join(messer2020DC$ndi, powell_wiley2020DCi$ndi, by = "GEOID", suffix = c(".messer", ".powell_wiley")) +NDI_2020_DC <- messer_2020_DC$ndi %>% + left_join( + powell_wiley_2020_DCi$ndi, + by = 'GEOID', + suffix = c('.messer', '.powell_wiley') + ) -# Check the correlation the two NDI metrics (Messer and Powell-Wiley, imputed) as continuous values -cor(ndi2020DC$NDI.messer, ndi2020DC$NDI.powell_wiley, use = "complete.obs") # Pearsons r = 0.975 +# Check the correlation of two NDI metrics (Messer & Powell-Wiley, imputed) as continuous values +cor(NDI_2020_DC$NDI.messer, NDI_2020_DC$NDI.powell_wiley, use = 'complete.obs') # Pearson's r=0.975 # Check the similarity of the two NDI metrics (Messer and Powell-Wiley, imputed) as quartiles -table(ndi2020DC$NDIQuart, ndi2020DC$NDIQuint) +table(NDI_2020_DC$NDIQuart, NDI_2020_DC$NDIQuint) ``` ``` r -# ------------------- # -# Retrieve Gini Index # -# ------------------- # +# ---------------------------- # +# Retrieve aspatial Gini Index # +# ---------------------------- # # Gini Index based on Gini (1921) from the ACS-5 -gini2020DC <- gini(state = "DC", year = 2020) - -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) - -# Join the Gini Index values to the census tract geometry -gini2020DC <- dplyr::left_join(tract2020DC, gini2020DC$gini, by = "GEOID") - -ggplot2::ggplot() + - ggplot2::geom_sf(data = gini2020DC, - ggplot2::aes(fill = gini), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Gini Index\nGrey color denotes no data", - subtitle = "Washington, D.C. tracts") +G_2020_DC <- gini(state = 'DC', year = 2020) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the G (Gini) values to the census tract geometry +G_2020_DC <- tract_2020_DC %>% + left_join(G_2020_DC$g, by = 'GEOID') + +ggplot() + + geom_sf( + data = G_2020_DC, + aes(fill = G), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c() + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Gini Index\nGrey color denotes no data', + subtitle = 'Washington, D.C. tracts' + ) ``` -![](man/figures/gini.png) +![](man/figures/g.png) ``` r -# -------------------------------------------- # -# Compute Racial Isoliation Index (Anthopolos) # -# -------------------------------------------- # +# ---------------------------------------------------- # +# Compute spatial Racial Isoliation Index (Anthopolos) # +# ---------------------------------------------------- # # Racial Isolation Index based on Anthopolos et al. (2011) ## Selected subgroup: Not Hispanic or Latino, Black or African American alone -ri2020DC <- anthopolos(state = "DC", year = 2020, subgroup = "NHoLB") +RI_2020_DC <- anthopolos(state = 'DC', year = 2020, subgroup = 'NHoLB') -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) # Join the RI (Anthopolos) values to the census tract geometry -ri2020DC <- dplyr::left_join(tract2020DC, ri2020DC$ri, by = "GEOID") - -ggplot2::ggplot() + - ggplot2::geom_sf(data = ri2020DC, - ggplot2::aes(fill = RI), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Racial Isolation Index\nNot Hispanic or Latino, Black or African American alone (Anthopolos)", - subtitle = "Washington, D.C. tracts (not corrected for edge effects)") +RI_2020_DC <- tract_2020_DC %>% + left_join(RI_2020_DC$ri, by = 'GEOID') + +ggplot() + + geom_sf( + data = RI_2020_DC, + aes(fill = RI), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c() + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Racial Isolation Index\n + Not Hispanic or Latino, Black or African American alone (Anthopolos)', + subtitle = 'Washington, D.C. tracts (not corrected for edge effects)' + ) ``` ![](man/figures/ri.png) ``` r -# -------------------------------------------- # -# Compute Educational Isoliation Index (Bravo) # -# -------------------------------------------- # +# ---------------------------------------------------- # +# Compute spatial Educational Isoliation Index (Bravo) # +# ---------------------------------------------------- # # Educational Isolation Index based on Bravo et al. (2021) ## Selected subgroup: without four-year college degree -ei2020DC <- bravo(state = "DC", year = 2020, subgroup = c("LtHS", "HSGiE", "SCoAD")) +EI_2020_DC <- bravo(state = 'DC', year = 2020, subgroup = c('LtHS', 'HSGiE', 'SCoAD')) -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) # Join the EI (Bravo) values to the census tract geometry -ei2020DC <- dplyr::left_join(tract2020DC, ei2020DC$ei, by = "GEOID") - -ggplot2::ggplot() + - ggplot2::geom_sf(data = ei2020DC, - ggplot2::aes(fill = EI), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Educational Isolation Index\nWithout a four-year college degree (Bravo)", - subtitle = "Washington, D.C. tracts (not corrected for edge effects)") +EI_2020_DC <- tract_2020_DC %>% + left_join(EI_2020_DC$ei, by = 'GEOID') + +ggplot() + + geom_sf( + data = EI_2020_DC, + aes(fill = EI), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c() + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + )+ + ggtitle( + 'Educational Isolation Index\nWithout a four-year college degree (Bravo)', + subtitle = 'Washington, D.C. tracts (not corrected for edge effects)' + ) ``` ![](man/figures/ei.png) ``` r -# ------------------------------------------------ # -# Index of Concentration at the Extremes (Krieger) # -# ------------------------------------------------ # +# ----------------------------------------------------------------- # +# Compute aspatial Index of Concentration at the Extremes (Krieger) # +# ----------------------------------------------------------------- # -# Five Indices of Concentration at the Extremes based on Feldman et al. (2015) and Krieger et al. (2016) +# Five Indices of Concentration at the Extremes based on Feldman et al. (2015) and +# Krieger et al. (2016) -ice2020DC <- krieger(state = "DC", year = 2020) +ICE_2020_DC <- krieger(state = 'DC', year = 2020) -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) # Join the ICEs (Krieger) values to the census tract geometry -ice2020DC <- dplyr::left_join(tract2020DC, ice2020DC$ice, by = "GEOID") +ICE_2020_DC <- tract_2020_DC %>% + left_join(ICE_2020_DC$ice, by = 'GEOID') # Plot ICE for Income -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020DC, - ggplot2::aes(fill = ICE_inc), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1,1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome (Krieger)", - subtitle = "80th income percentile vs. 20th income percentile") +ggplot() + + geom_sf( + data = ICE_2020_DC, + aes(fill = ICE_inc), + color = 'white' + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Index of Concentration at the Extremes\nIncome (Krieger)', + subtitle = '80th income percentile vs. 20th income percentile' + ) ``` ![](man/figures/ice1.png) ```r # Plot ICE for Education -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020DC, - ggplot2::aes(fill = ICE_edu), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1,1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nEducation (Krieger)", - subtitle = "less than high school vs. four-year college degree or more") +ggplot() + + geom_sf( + data = ICE_2020_DC, + aes(fill = ICE_edu), + color = 'white' + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Index of Concentration at the Extremes\nEducation (Krieger)', + subtitle = 'less than high school vs. four-year college degree or more' + ) ``` ![](man/figures/ice2.png) ```r # Plot ICE for Race/Ethnicity -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020DC, - ggplot2::aes(fill = ICE_rewb), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nRace/Ethnicity (Krieger)", - subtitle = "white non-Hispanic vs. black non-Hispanic") +ggplot() + + geom_sf( + data = ICE_2020_DC, + aes(fill = ICE_rewb), + color = 'white' + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Index of Concentration at the Extremes\nRace/Ethnicity (Krieger)', + subtitle = 'white non-Hispanic vs. black non-Hispanic' + ) ``` ![](man/figures/ice3.png) ``` # Plot ICE for Income and Race/Ethnicity Combined -## white non-Hispanic in 80th income percentile vs. black (including Hispanic) in 20th income percentile -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020DC, - ggplot2::aes(fill = ICE_wbinc), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome and race/ethnicity combined (Krieger)", - subtitle = "white non-Hispanic in 80th income percentile vs. black (incl. Hispanic) in 20th inc. percentile") +## white non-Hispanic in 80th income percentile vs. +## black (including Hispanic) in 20th income percentile +ggplot() + + geom_sf( + data = ICE_2020_DC, + aes(fill = ICE_wbinc), + color = 'white' + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Index of Concentration at the Extremes\nIncome and race/ethnicity combined (Krieger)', + subtitle = 'white non-Hispanic in 80th income percentile vs. + black (incl. Hispanic) in 20th inc. percentile' + ) ``` ![](man/figures/ice4.png) @@ -494,98 +677,472 @@ ggplot2::ggplot() + ```r # Plot ICE for Income and Race/Ethnicity Combined ## white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020DC, - ggplot2::aes(fill = ICE_wpcinc), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome and race/ethnicity combined (Krieger)", - subtitle = "white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile") +ggplot() + + geom_sf( + data = ICE_2020_DC, + aes(fill = ICE_wpcinc), + color = 'white' + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Index of Concentration at the Extremes\nIncome and race/ethnicity combined (Krieger)', + subtitle = 'white non-Hispanic in 80th income percentile vs. + white non-Hispanic in 20th income percentile' + ) ``` ![](man/figures/ice5.png) ```r -# -------------------------------------------------- # -# Compute racial/ethnic Dissimilarity Index (Duncan) # -# -------------------------------------------------- # +# -------------------------------------------------------------------- # +# Compute aspatial racial/ethnic Dissimilarity Index (Duncan & Duncan) # +# -------------------------------------------------------------------- # # Dissimilarity Index based on Duncan & Duncan (1955) ## Selected subgroup comparison: Not Hispanic or Latino, Black or African American alone ## Selected subgroup reference: Not Hispanic or Latino, white alone ## Selected large geography: census tract ## Selected small geography: census block group -di2020DC <- duncan(geo_large = "tract", geo_small = "block group", - state = "DC", year = 2020, - subgroup = "NHoLB", subgroup_ref = "NHoLW") - -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) - -# Join the DI (Duncan) values to the census tract geometry -di2020DC <- dplyr::left_join(tract2020DC, di2020DC$di, by = "GEOID") - -ggplot2::ggplot() + - ggplot2::geom_sf(data = di2020DC, - ggplot2::aes(fill = DI), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_c(limits = c(0, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Dissimilarity Index (Duncan)\nWashington, D.C. census block groups to tracts", - subtitle = "Black non-Hispanic vs. white non-Hispanic") +D_2020_DC <- duncan( + geo_large = 'tract', + geo_small = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the D (Duncan & Duncan) values to the census tract geometry +D_2020_DC <- tract_2020_DC %>% + left_join(D_2020_DC$d, by = 'GEOID') + +ggplot() + + geom_sf( + data = D_2020_DC, + aes(fill = D), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Dissimilarity Index (Duncan & Duncan)\n + Washington, D.C. census block groups to tracts', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) ``` -![](man/figures/di.png) +![](man/figures/d.png) ```r -# ----------------------------------------------- # -# Compute racial/ethnic Atkinson Index (Atkinson) # -# ----------------------------------------------- # +# -------------------------------------------------------- # +# Compute aspatial racial/ethnic Atkinson Index (Atkinson) # +# -------------------------------------------------------- # # Atkinson Index based on Atkinson (1970) ## Selected subgroup: Not Hispanic or Latino, Black or African American alone ## Selected large geography: census tract ## Selected small geography: census block group ## Default epsilon (0.5 or over- and under-representation contribute equally) -ai2020DC <- atkinson(geo_large = "tract", geo_small = "block group", - state = "DC", year = 2020, subgroup = "NHoLB") - -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) - -# Join the DI (Duncan) values to the census tract geometry -ai2020DC <- dplyr::left_join(tract2020DC, ai2020DC$ai, by = "GEOID") - -ggplot2::ggplot() + - ggplot2::geom_sf(data = ai2020DC, - ggplot2::aes(fill = AI), - color = "white") + - ggplot2::theme_bw() + - ggplot2::scale_fill_viridis_c(limits = c(0, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates") + - ggplot2::ggtitle("Atkinson Index (Atkinson)\nWashington, D.C. census block groups to tracts", - subtitle = expression(paste("Black non-Hispanic (", epsilon, " = 0.5)"))) +A_2020_DC <- atkinson( + geo_large = 'tract', + geo_small = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the AI (Atkinson) values to the census tract geometry +A_2020_DC <- tract_2020_DC %>% + left_join(A_2020_DC$a, by = 'GEOID') + +ggplot() + + geom_sf( + data = A_2020_DC, + aes(fill = A), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Atkinson Index (Atkinson)\n + Washington, D.C. census block groups to tracts', + subtitle = expression(paste('Black non-Hispanic (', epsilon, ' = 0.5)')) + ) +``` + +![](man/figures/a.png) + +```r +# ------------------------------------------------------- # +# Compute aspatial racial/ethnic Interaction Index (Bell) # +# ------------------------------------------------------- # + +# Interaction Index based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected interaction subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: census tract +## Selected small geography: census block group +xPy_star_2020_DC <- bell( + geo_large = 'tract', + geo_small = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the xPy* (Bell) values to the census tract geometry +xPy_star_2020_DC <- tract_2020_DC %>% + left_join(xPy_star_2020_DC$xpy_star, by = 'GEOID') + +ggplot() + + geom_sf( + data = xPy_star_2020_DC, + aes(fill = xPy_star), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Interaction Index (Bell)\n + Washington, D.C. census block groups to tracts', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +``` + +![](man/figures/xpy_star.png) + +```r +# -------------------------------------------------------- # +# Compute aspatial racial/ethnic Correlation Ratio (White) # +# -------------------------------------------------------- # + +# Correlation Ratio based on Bell (1954) and White (1986) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: census tract +## Selected small geography: census block group +V_2020_DC <- white( + geo_large = 'tract', + geo_small = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the V (White) values to the census tract geometry +V_2020_DC <- tract_2020_DC %>% + left_join(V_2020_DC$v, by = 'GEOID') + +ggplot() + + geom_sf( + data = V_2020_DC, + aes(fill = V), + color = 'white' + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + midpoint = 0 + ) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Correlation Ratio (White)\n + Washington, D.C. census block groups to tracts', + subtitle = 'Black non-Hispanic' + ) + ggsave('man/figures/v.png', width = 7, height = 7) +``` + +![](man/figures/v.png) + +```r +# --------------------------------------------------------- # +# Compute aspatial racial/ethnic Location Quotient (Sudano) # +# --------------------------------------------------------- # + +# Location Quotient based on Merton (1938) and Sudano (2013) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: state +## Selected small geography: census tract +LQ_2020_DC <- sudano( + geo_large = 'state', + geo_small = 'tract', + state = 'DC', + year = 2020, + subgroup = 'NHoLB' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the LQ (Sudano) values to the census tract geometry +LQ_2020_DC <- tract_2020_DC %>% + left_join(LQ_2020_DC$lq, by = 'GEOID') + +ggplot() + + geom_sf( + data = LQ_2020_DC, + aes(fill = LQ), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c() + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Location Quotient (Sudano)\n + Washington, D.C. census tracts vs. "state"', + subtitle = 'Black non-Hispanic' + ) +``` + +![](man/figures/lq.png) + +```r +# ------------------------------------------------------------------------------------- # +# Compute aspatial racial/ethnic Local Exposure and Isolation (Bemanian & Beyer) metric # +# ------------------------------------------------------------------------------------- # + +# Local Exposure and Isolation metric based on Bemanian & Beyer (2017) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected interaction subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: state +## Selected small geography: census tract +LExIs_2020_DC <- bemanian_beyer( + geo_large = 'state', + geo_small = 'tract', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the LEx/Is (Bemanian & Beyer) values to the census tract geometry +LExIs_2020_DC <- tract_2020_DC %>% + left_join(LExIs_2020_DC$lexis, by = 'GEOID') + +ggplot() + + geom_sf( + data = LExIs_2020_DC, + aes(fill = LExIs), + color = 'white' + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + midpoint = 0 + ) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Local Exposure and Isolation (Bemanian & Beyer) metric\n + Washington, D.C. census block groups to tracts', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +``` + +![](man/figures/lexis.png) + +```r +# --------------------------------------------- # +# Compute aspatial racial/ethnic Delta (Hoover) # +# --------------------------------------------- # + +# Delta based on Hoover (1941) and Duncan et al. (1961) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: census tract +## Selected small geography: census block group +DEL_2020_DC <- hoover( + geo_large = 'tract', + geo_small = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the DEL (Hoover) values to the census tract geometry +DEL_2020_DC <- tract_2020_DC %>% + left_join(DEL_2020_DC$del, by = 'GEOID') + +ggplot() + + geom_sf( + data = DEL_2020_DC, + aes(fill = DEL), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Delta (Hoover)\n + Washington, D.C. census block groups to tracts', + subtitle = 'Black non-Hispanic' + ) +``` + +![](man/figures/del.png) + +```r +# --------------------------------------------- # +# Compute an index of spatial proximity (White) # +# --------------------------------------------- # + +# An index of spatial proximity based on White (1986) & Blau (1977) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: census tract +## Selected small geography: census block group +SP_2020_DC <- white_blau( + geo_large = 'tract', + geo_small = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the SP (White) values to the census tract geometry +SP_2020_DC <- tract_2020_DC %>% + left_join(SP_2020_DC$sp, by = 'GEOID') + +ggplot() + + geom_sf( + data = SP_2020_DC, + aes(fill = SP), + color = 'white' + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + midpoint = 1 + ) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'An index of spatial proximity (White)\n + Washington, D.C. census block groups to tracts', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +``` + +![](man/figures/sp.png) + +```r +# ---------------------------------------------------------- # +# Compute aspatial racial/ethnic Isolation Index (Lieberson) # +# ---------------------------------------------------------- # + +# Interaction Index based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and Bell (1954) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: census tract +## Selected small geography: census block group +xPx_star_2020_DC <- lieberson( + geo_large = 'tract', + geo_small = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the xPx* (Lieberson) values to the census tract geometry +xPx_star_2020_DC <- tract_2020_DC %>% + left_join(xPx_star_2020_DC$xpx_star, by = 'GEOID') + +ggplot() + + geom_sf( + data = xPx_star_2020_DC, + aes(fill = xPx_star), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Isolation Index (Lieberson)\n + Washington, D.C. census block groups to tracts', + subtitle = 'Black non-Hispanic' + ) ``` -![](man/figures/ai.png) +![](man/figures/xpx_star.png) ### Funding -This package was originally developed while the author was a postdoctoral fellow supported by the [Cancer Prevention Fellowship Program](https://cpfp.cancer.gov) at the [National Cancer Institute](https://www.cancer.gov). Any modifications since December 05, 2022 were made while the author was an employee of Social & Scientific Systems, Inc., a division of [DLH Corporation](https://www.dlhcorp.com). +This package was originally developed while the author was a postdoctoral fellow supported by the [Cancer Prevention Fellowship Program](https://cpfp.cancer.gov) at the [National Cancer Institute](https://www.cancer.gov). Any modifications since December 05, 2022 were made while the author was an employee of [DLH, LLC](https://www.dlhcorp.com) (formerly Social & Scientific Systems, Inc.). ### Acknowledgments -The `messer()` function functionalizes the code found in [Hruska et al. (2022)](https://doi.org/10.1016/j.janxdis.2022.102529) available on an [OSF repository](https://doi.org/10.17605/OSF.IO/M2SAV), but with percent with income less than $30K added to the computation based on [Messer et al. (2006)](https://doi.org/10.1007/s11524-006-9094-x). The `messer()` function also allows for the computation of NDI (Messer) for each year between 2010-2020 (when the U.S. census characteristics are available to date). There was no code companion to compute NDI (Powell-Wiley) included in [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) or [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002), but the package author worked directly with the latter manuscript authors to replicate their `SAS` code in `R` for the `powell_wiley()` function. Please note: the NDI (Powell-Wiley) values will not exactly match (but will highly correlate with) those found in [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) and [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) because the two studies used a different statistical platform (i.e., `SPSS` and `SAS`, respectively) that intrinsically calculate the principal component analysis differently from `R`. The internal function to calculate the Atkinson Index is based on the `Atkinson()` function in the [DescTools](https://cran.r-project.org/package=DescTools) package. +The [`messer()`](R/messer.R) function functionalizes the code found in [Hruska et al. (2022)](https://doi.org/10.1016/j.janxdis.2022.102529) available on an [OSF repository](https://doi.org/10.17605/OSF.IO/M2SAV), but with percent with income less than $30K added to the computation based on [Messer et al. (2006)](https://doi.org/10.1007/s11524-006-9094-x). The [`messer()`](R/messer.R) function also allows for the computation of *NDI* (Messer) for each year between 2010-2020 (when the U.S. census characteristics are available to date). There was no code companion to compute *NDI* (Powell-Wiley) included in [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) or [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) only a [description](https://www.gis.cancer.gov/research/NeighDeprvIndex_Methods.pdf), but the package author worked directly with the latter manuscript authors to replicate their [*SAS*](https://www.sas.com) code in [**R**](https://cran.r-project.org/) for the [`powell_wiley()`](R/powell_wiley.R) function. See the Accumulating Data to Optimally Predict Obesity Treatment [(ADOPT)](https://gis.cancer.gov/research/adopt.html) Core Measures Project for more details. Please note: the *NDI* (Powell-Wiley) values will not exactly match (but will highly correlate with) those found in [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) and [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) because the two studies used a different statistical platform (i.e., [*SPSS*](https://www.ibm.com/spss) and [*SAS*](https://www.sas.com), respectively) that intrinsically calculate the principal component analysis differently from [**R**](https://cran.r-project.org/). The internal function to calculate the Atkinson Index is based on the `atkinson()` function in the [*DescTools*](https://cran.r-project.org/package=DescTools) package. When citing this package for publication, please follow: - citation("ndi") + citation('ndi') ### Questions? Feedback? diff --git a/cran-comments.md b/cran-comments.md index 7b877c0..4abd0c8 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,15 +1,23 @@ -## This is the fifth resubmission +## This is the seventh resubmission * Actions taken since previous submission: - * Added `atkinson()` function to compute the Atkinson Index (AI) based on [Atkinson (1970)](https://doi.org/10.1016/0022-0531(70)90039-6) for specified counties/tracts 2009 onward - * Fixed bug in reverse dependency check failure for `anthopolos()` and `bravo()` functions removing `returnValue()` when data are not missing - * Thank you, [Roger Bivand](https://github.com/rsbivand), for the catch. Relates to [ndi Issue #5](https://github.com/idblr/ndi/issues/5) - * Updated `duncan()`, `gini()`, `krieger()`, `messer()`, and `powell_wiley()` for consistency in messaging when data are not missing - * Updated tests for `anthopolos()` and `bravo()` if `Sys.getenv("CENSUS_API_KEY") != ""` - * Added `omit_NAs` argument in `duncan()` function to choose if NA values will be included in its computation - * In `duncan()` function, if any smaller geographic unit has zero counts the output for its larger geographic unit will be NA - * Fixed bug in `duncan()` function for multiple `subgroup` and `subgroup_ref` selections - * Updated documentation throughout + * Added `hoover()` function to compute the aspatial racial/ethnic Delta (*DEL*) based on [Hoover (1941)](https://doi.org/10.1017/S0022050700052980) and Duncan et al. (1961; LC:60007089) + * Added `white_blau()` function to compute an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) + * Added `lieberson()` function to compute he aspatial racial/ethnic Isolation Index (_xPx\*_) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) + * Added `geo_large = 'cbsa'` for Core Based Statistical Areas, `geo_large = 'csa'` for Combined Statistical Areas, and `geo_large = 'metro'` for Metropolitan Divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `duncan()`, `hoover()`, `sudano()`, and `white()`, `white_blau()` functions. + * Thank you for the feature suggestions, [Symielle Gaston](https://orcid.org/0000-0001-9495-1592) + * `bell()` function computes the Interaction Index (Bell) not the Isolation Index as previously documented. Updated documentation throughout + * Fixed bug in `bell()`, `bemanian_beyer()`, `duncan()`, `sudano()`, and `white()` functions when a smaller geography contains n=0 total population, will assign a value of zero (0) in the internal calculation instead of NA + * Renamed *AI* as *A*, *DI* as *D*, *Gini* as *G*, and *II* as _xPy\*_ to align with the definitions from [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). The output for `atkinson()` now produces `a` instead of `ai`. The output for `duncan()` now produces `d` instead of `ai`. The output for `gini()` now produces `g` instead of `gini`. The output for `bell()` now produces `xPy_star` instead of `II`. The internal functions `ai_fun()`, `di_fun()` and `ii_fun()` were renamed `a_fun()`, `d_fun()` and `xpy_star_fun()`, respectively. + * `tigris` and `units` are now Imports + * 'package.R' deprecated. Replaced with 'ndi-package.R' + * Re-formatted code and documentation throughout for consistent readability + * Updated documentation about value range of *V* (White) from `{0 to 1}` to `{-Inf to Inf}` + * Add examples for `hoover()` and `white_blau()` in vignette and README + * Reformatted functions for consistent internal structure + * Updated examples in vignette to showcase a larger variety of U.S. states + * Updated examples in functions to better describe the metrics + * Updated documentation formatting of metric names in all functions * Documentation for DESCRIPTION, README, NEWS, and vignette references the following DOIs, which throws a NOTE but are a valid URL: * @@ -17,11 +25,13 @@ * * * + * + * -* Some tests and examples for `anthopolos()`, `atkinson()`, `bravo()`, `duncan()`, `gini()`, `krieger()`, `messer()`, and `powell_wiley()` functions require a Census API key so they are skipped if NULL or not run +* Some tests and examples for `anthopolos()`, `atkinson()`, `bell()`, `bemanian_beyer()`, `bravo()`, `duncan()`, `gini()`, `hoover()`, `krieger()`, `messer()`, `powell_wiley()`, `sudano()`, and `white()` functions require a Census API key so they are skipped if NULL or not run ## Test environments -* local Windows install, R 4.2.1 +* local Windows install, R 4.4.1 * win-builder, (devel, release, oldrelease) * Rhub * Fedora Linux, R-devel, clang, gfortran diff --git a/data-raw/get_DCtracts2020.R b/data-raw/get_DCtracts2020.R index cc25284..bb881c1 100644 --- a/data-raw/get_DCtracts2020.R +++ b/data-raw/get_DCtracts2020.R @@ -1,147 +1,216 @@ -# code to prepare `DCtracts2020` +# ----------------------------------------------------------------------------------------------- # +# Code to prepare `DCtracts2020` +# ----------------------------------------------------------------------------------------------- # +# +# Created by: Ian Buller, Ph.D., M.A. (GitHub: @idblr) +# Created on: 2022-07-23 +# +# Recently modified by: @idblr +# Recently modified on: 2024-07-06 +# +# Notes: +# A) 2024-07-06 (@idblr): Re-formatted +# ----------------------------------------------------------------------------------------------- # # ------------------ # -# Necessary packages # +# NECESSARY PACKAGES # # ------------------ # -library(dplyr) -library(tidycensus) -library(usethis) +loadedPackages <- c('dplyr', 'tidycensus', 'usethis') +suppressMessages(invisible(lapply(loadedPackages, library, character.only = TRUE))) # -------- # -# Settings # +# SETTINGS # # -------- # ## Access Key for census data download ### Obtain one at http://api.census.gov/data/key_signup.html -tidycensus::census_api_key("...") # INSERT YOUR OWN KEY FROM U.S. CENSUS API +census_api_key('...') # INSERT YOUR OWN KEY FROM U.S. CENSUS API # ---------------- # -# Data preparation # +# DATA PREPARATION # # ---------------- # # U.S. Census Bureau American Community Survey (ACS) 5-year variables ## For NDI (Messer) ### ACS-5 variables -messer_vars <- c(PctMenMgmtBusScArti_num1 = "C24030_018", PctMenMgmtBusScArti_num2 = "C24030_019", - PctMenMgmtBusScArti_den = "C24030_002", - PctCrwdHH_num1 = "B25014_005", PctCrwdHH_num2 = "B25014_006", - PctCrwdHH_num3 = "B25014_007", PctCrwdHH_num4 = "B25014_011", - PctCrwdHH_num5 = "B25014_012", PctCrwdHH_num6 = "B25014_013", - PctCrwdHH_den = "B25014_001", - PctHHPov_num = "B17017_002", PctHHPov_den = "B17017_001", - PctFemHeadKids_num1 = "B25115_012", PctFemHeadKids_num2 = "B25115_025", - PctFemHeadKids_den = "B25115_001", - PctPubAsst_num = "B19058_002", PctPubAsst_den = "B19058_001", - PctHHUnder30K_num1 = "B19001_002", PctHHUnder30K_num2 = "B19001_003", - PctHHUnder30K_num3 = "B19001_004", PctHHUnder30K_num4 = "B19001_005", - PctHHUnder30K_num5 = "B19001_006", PctHHUnder30K_den = "B19001_001", - PctEducLessThanHS_num = "B06009_002", PctEducLessThanHS_den = "B06009_001", - PctUnemp_num = "B23025_005", PctUnemp_den = "B23025_003") +messer_vars <- c( + PctMenMgmtBusScArti_num1 = 'C24030_018', + PctMenMgmtBusScArti_num2 = 'C24030_019', + PctMenMgmtBusScArti_den = 'C24030_002', + PctCrwdHH_num1 = 'B25014_005', + PctCrwdHH_num2 = 'B25014_006', + PctCrwdHH_num3 = 'B25014_007', + PctCrwdHH_num4 = 'B25014_011', + PctCrwdHH_num5 = 'B25014_012', + PctCrwdHH_num6 = 'B25014_013', + PctCrwdHH_den = 'B25014_001', + PctHHPov_num = 'B17017_002', + PctHHPov_den = 'B17017_001', + PctFemHeadKids_num1 = 'B25115_012', + PctFemHeadKids_num2 = 'B25115_025', + PctFemHeadKids_den = 'B25115_001', + PctPubAsst_num = 'B19058_002', + PctPubAsst_den = 'B19058_001', + PctHHUnder30K_num1 = 'B19001_002', + PctHHUnder30K_num2 = 'B19001_003', + PctHHUnder30K_num3 = 'B19001_004', + PctHHUnder30K_num4 = 'B19001_005', + PctHHUnder30K_num5 = 'B19001_006', + PctHHUnder30K_den = 'B19001_001', + PctEducLessThanHS_num = 'B06009_002', + PctEducLessThanHS_den = 'B06009_001', + PctUnemp_num = 'B23025_005', + PctUnemp_den = 'B23025_003' +) ### Obtain ACS-5 data for DC tracts in 2020 -DCtracts2020messer <- tidycensus::get_acs(geography = "tract", - year = 2020, - output = "wide", - variables = messer_vars, - state = "DC") +DCtracts2020messer <- get_acs( + geography = 'tract', + year = 2020, + output = 'wide', + variables = messer_vars, + state = 'DC' +) ### Format ACS-5 data for NDI (Messer) of DC tracts in 2020 DCtracts2020messer <- DCtracts2020messer[ , -2] # omit NAME feature (column) DCtracts2020messer <- DCtracts2020messer %>% - dplyr::mutate(OCC = (PctMenMgmtBusScArti_num1E + PctMenMgmtBusScArti_num2E) / PctMenMgmtBusScArti_denE, - CWD = (PctCrwdHH_num1E + PctCrwdHH_num2E + PctCrwdHH_num3E + - PctCrwdHH_num4E + PctCrwdHH_num5E + PctCrwdHH_num6E) / PctCrwdHH_denE, - POV = PctHHPov_numE / PctHHPov_denE, - FHH = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE, - PUB = PctPubAsst_numE / PctPubAsst_denE, - U30 = (PctHHUnder30K_num1E + PctHHUnder30K_num2E + PctHHUnder30K_num3E + - PctHHUnder30K_num4E + PctHHUnder30K_num5E) / PctHHUnder30K_denE, - EDU = PctEducLessThanHS_numE / PctEducLessThanHS_denE, - EMP = PctUnemp_numE / PctUnemp_denE) + mutate( + OCC = (PctMenMgmtBusScArti_num1E + PctMenMgmtBusScArti_num2E) / PctMenMgmtBusScArti_denE, + CWD = ( + PctCrwdHH_num1E + PctCrwdHH_num2E + PctCrwdHH_num3E + + PctCrwdHH_num4E + PctCrwdHH_num5E + PctCrwdHH_num6E + ) / PctCrwdHH_denE, + POV = PctHHPov_numE / PctHHPov_denE, + FHH = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE, + PUB = PctPubAsst_numE / PctPubAsst_denE, + U30 = ( + PctHHUnder30K_num1E + PctHHUnder30K_num2E + PctHHUnder30K_num3E + + PctHHUnder30K_num4E + PctHHUnder30K_num5E + ) / PctHHUnder30K_denE, + EDU = PctEducLessThanHS_numE / PctEducLessThanHS_denE, + EMP = PctUnemp_numE / PctUnemp_denE + ) ### Clean-up and format DCtracts2020messer <- DCtracts2020messer %>% - dplyr::select(GEOID, OCC, CWD, POV, FHH, PUB, U30, EDU, EMP) + select(GEOID, OCC, CWD, POV, FHH, PUB, U30, EDU, EMP) ## For NDI (Powell-Wiley) ### ACS-5 variables -powell_wiley_vars <- c(MedHHInc = "B19013_001", - PctRecvIDR_num = "B19054_002", PctRecvIDR_den = "B19054_001", - PctPubAsst_num = "B19058_002", PctPubAsst_den = "B19058_001", - MedHomeVal = "B25077_001", - PctMgmtBusScArti_num = "C24060_002", PctMgmtBusScArti_den = "C24060_001", - PctFemHeadKids_num1 = "B11005_007", PctFemHeadKids_num2 = "B11005_010", - PctFemHeadKids_den = "B11005_001", - PctOwnerOcc = "DP04_0046P", - PctNoPhone = "DP04_0075P", - PctNComPlmb = "DP04_0073P", - PctEduc_num25upHS = "S1501_C01_009", - PctEduc_num25upSC = "S1501_C01_010", - PctEduc_num25upAD = "S1501_C01_011", - PctEduc_num25upBD = "S1501_C01_012", - PctEduc_num25upGD = "S1501_C01_013", - PctEduc_den25up = "S1501_C01_006", - PctFamBelowPov = "S1702_C02_001", - PctUnempl = "S2301_C04_001", - TotalPopulation = "B01001_001") +powell_wiley_vars <- c( + MedHHInc = 'B19013_001', + PctRecvIDR_num = 'B19054_002', + PctRecvIDR_den = 'B19054_001', + PctPubAsst_num = 'B19058_002', + PctPubAsst_den = 'B19058_001', + MedHomeVal = 'B25077_001', + PctMgmtBusScArti_num = 'C24060_002', + PctMgmtBusScArti_den = 'C24060_001', + PctFemHeadKids_num1 = 'B11005_007', + PctFemHeadKids_num2 = 'B11005_010', + PctFemHeadKids_den = 'B11005_001', + PctOwnerOcc = 'DP04_0046P', + PctNoPhone = 'DP04_0075P', + PctNComPlmb = 'DP04_0073P', + PctEduc_num25upHS = 'S1501_C01_009', + PctEduc_num25upSC = 'S1501_C01_010', + PctEduc_num25upAD = 'S1501_C01_011', + PctEduc_num25upBD = 'S1501_C01_012', + PctEduc_num25upGD = 'S1501_C01_013', + PctEduc_den25up = 'S1501_C01_006', + PctFamBelowPov = 'S1702_C02_001', + PctUnempl = 'S2301_C04_001', + TotalPopulation = 'B01001_001' +) ### Obtain ACS-5 data for DC tracts in 2020 -DCtracts2020pw <- tidycensus::get_acs(geography = "tract", - year = 2020, - output = "wide", - variables = powell_wiley_vars, - state = "DC") +DCtracts2020pw <- get_acs( + geography = 'tract', + year = 2020, + output = 'wide', + variables = powell_wiley_vars, + state = 'DC' +) ### Format ACS-5 data for NDI (Powell-Wiley) of DC tracts in 2020 -DCtracts2020pw <- DCtracts2020pw[ , -2] # omit NAME feature (column) +DCtracts2020pw <- DCtracts2020pw[,-2] # omit NAME feature (column) DCtracts2020pw <- DCtracts2020pw %>% - dplyr::mutate(MedHHInc = MedHHIncE, - PctRecvIDR = PctRecvIDR_numE / PctRecvIDR_denE * 100, - PctPubAsst = PctPubAsst_numE / PctPubAsst_denE * 100, - MedHomeVal = MedHomeValE, - PctMgmtBusScArti = PctMgmtBusScArti_numE / PctMgmtBusScArti_denE * 100, - PctFemHeadKids = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE * 100, - PctOwnerOcc = PctOwnerOccE, - PctNoPhone = PctNoPhoneE, - PctNComPlmb = PctNComPlmbE, - PctEducHSPlus = (PctEduc_num25upHSE + PctEduc_num25upSCE + PctEduc_num25upADE + - PctEduc_num25upBDE + PctEduc_num25upGDE) / PctEduc_den25upE * 100, - PctEducBchPlus = (PctEduc_num25upBDE + PctEduc_num25upGDE) / PctEduc_den25upE * 100, - PctFamBelowPov = PctFamBelowPovE, - PctUnempl = PctUnemplE, - TotalPop = TotalPopulationE) %>% + mutate( + MedHHInc = MedHHIncE, + PctRecvIDR = PctRecvIDR_numE / PctRecvIDR_denE * 100, + PctPubAsst = PctPubAsst_numE / PctPubAsst_denE * 100, + MedHomeVal = MedHomeValE, + PctMgmtBusScArti = PctMgmtBusScArti_numE / PctMgmtBusScArti_denE * 100, + PctFemHeadKids = (PctFemHeadKids_num1E + PctFemHeadKids_num2E) / PctFemHeadKids_denE * 100, + PctOwnerOcc = PctOwnerOccE, + PctNoPhone = PctNoPhoneE, + PctNComPlmb = PctNComPlmbE, + PctEducHSPlus = ( + PctEduc_num25upHSE + PctEduc_num25upSCE + PctEduc_num25upADE + + PctEduc_num25upBDE + PctEduc_num25upGDE + ) / PctEduc_den25upE * 100, + PctEducBchPlus = (PctEduc_num25upBDE + PctEduc_num25upGDE) / PctEduc_den25upE * 100, + PctFamBelowPov = PctFamBelowPovE, + PctUnempl = PctUnemplE, + TotalPop = TotalPopulationE + ) %>% # Log transform median household income and median home value - # Reverse code percentages so that higher values represent more deprivation + # Reverse code percentages so that higher values represent more deprivation # Round percentages to 1 decimal place - dplyr::mutate(logMedHHInc = log(MedHHInc), - logMedHomeVal = log(MedHomeVal), - PctNoIDR = 100 - PctRecvIDR, - PctWorkClass = 100 - PctMgmtBusScArti, - PctNotOwnerOcc = 100 - PctOwnerOcc, - PctEducLTHS = 100 - PctEducHSPlus, - PctEducLTBch = 100 - PctEducBchPlus) %>% + mutate( + logMedHHInc = log(MedHHInc), + logMedHomeVal = log(MedHomeVal), + PctNoIDR = 100 - PctRecvIDR, + PctWorkClass = 100 - PctMgmtBusScArti, + PctNotOwnerOcc = 100 - PctOwnerOcc, + PctEducLTHS = 100 - PctEducHSPlus, + PctEducLTBch = 100 - PctEducBchPlus + ) %>% # Z-standardize the percentages - dplyr::mutate(PctNoIDRZ = scale(PctNoIDR), - PctPubAsstZ = scale(PctPubAsst), - PctWorkClassZ = scale(PctWorkClass), - PctFemHeadKidsZ = scale(PctFemHeadKids), - PctNotOwnerOccZ = scale(PctNotOwnerOcc), - PctNoPhoneZ = scale(PctNoPhone), - PctNComPlmbZ = scale(PctNComPlmb), - PctEducLTHSZ = scale(PctEducLTHS), - PctEducLTBchZ = scale(PctEducLTBch), - PctFamBelowPovZ = scale(PctFamBelowPov), - PctUnemplZ = scale(PctUnempl)) + mutate( + PctNoIDRZ = scale(PctNoIDR), + PctPubAsstZ = scale(PctPubAsst), + PctWorkClassZ = scale(PctWorkClass), + PctFemHeadKidsZ = scale(PctFemHeadKids), + PctNotOwnerOccZ = scale(PctNotOwnerOcc), + PctNoPhoneZ = scale(PctNoPhone), + PctNComPlmbZ = scale(PctNComPlmb), + PctEducLTHSZ = scale(PctEducLTHS), + PctEducLTBchZ = scale(PctEducLTBch), + PctFamBelowPovZ = scale(PctFamBelowPov), + PctUnemplZ = scale(PctUnempl) + ) ### Clean-up and format DCtracts2020pw <- DCtracts2020pw %>% - dplyr::select(GEOID, TotalPop, logMedHHInc, PctNoIDRZ, PctPubAsstZ, logMedHomeVal, PctWorkClassZ, - PctFemHeadKidsZ, PctNotOwnerOccZ, PctNoPhoneZ, PctNComPlmbZ, PctEducLTHSZ, - PctEducLTBchZ, PctFamBelowPovZ, PctUnemplZ) + select( + GEOID, + TotalPop, + logMedHHInc, + PctNoIDRZ, + PctPubAsstZ, + logMedHomeVal, + PctWorkClassZ, + PctFemHeadKidsZ, + PctNotOwnerOccZ, + PctNoPhoneZ, + PctNComPlmbZ, + PctEducLTHSZ, + PctEducLTBchZ, + PctFamBelowPovZ, + PctUnemplZ + ) -# Combine -DCtracts2020 <- dplyr::left_join(DCtracts2020messer, DCtracts2020pw, by = "GEOID") -DCtracts2020 <- DCtracts2020[ , c(1, 10, 2:9, 11:ncol(DCtracts2020))] # reorder so TotalPop is second feature (column) +# Combine +DCtracts2020 <- left_join(DCtracts2020messer, DCtracts2020pw, by = 'GEOID') +# reorder so TotalPop is second feature (column) +DCtracts2020 <- DCtracts2020[, c(1, 10, 2:9, 11:ncol(DCtracts2020))] -# Export -usethis::use_data(DCtracts2020, overwrite = TRUE) +# ---------------- # +# DATA EXPORTATION # +# ---------------- # + +use_data(DCtracts2020, overwrite = TRUE) + +# ----------------------------------------- END OF CODE ----------------------------------------- # diff --git a/dev/hex_ndi.R b/dev/hex_ndi.R index 192285a..58ee32d 100644 --- a/dev/hex_ndi.R +++ b/dev/hex_ndi.R @@ -1,72 +1,70 @@ -# ------------------------------------------------------------------------------ # -# Hexsticker for the GitHub Repository idblr/ndi -# ------------------------------------------------------------------------------ # +# ----------------------------------------------------------------------------------------------- # +# Hexagon sticker for the GitHub Repository idblr/ndi +# ----------------------------------------------------------------------------------------------- # # # Created by: Ian Buller, Ph.D., M.A. (GitHub: @idblr) -# Created on: July 23, 2022 +# Created on: 2022-07-23 # # Recently modified by: @idblr -# Recently modified on: August 04, 2022 +# Recently modified on: 2024-07-06 # # Notes: -# A) Uses the "hexSticker" package +# A) Uses the 'hexSticker' package # B) Subplot from an example computation of tract-level NDI (Messer) for Washington, D.C. (2020) # C) Hexsticker for the GitHub Repository https://github.com/idblr/ndi -# ------------------------------------------------------------------------------ # +# ----------------------------------------------------------------------------------------------- # -############ +# -------- # # PACKAGES # -############ +# -------- # -loadedPackages <- c("hexSticker", "ndi") +loadedPackages <- c('ggplot2', 'hexSticker', 'ndi', 'tidycensus', 'tigris') suppressMessages(invisible(lapply(loadedPackages, library, character.only = TRUE))) -############ +# -------- # # SETTINGS # -############ +# -------- # ## Access Key for census data download ### Obtain one at http://api.census.gov/data/key_signup.html -tidycensus::census_api_key("...") # INSERT YOUR OWN KEY FROM U.S. CENSUS API +census_api_key('...') # INSERT YOUR OWN KEY FROM U.S. CENSUS API -###################### +# ------------------ # # SUBPLOT GENERATION # -###################### +# ------------------ # # NDI 2020 -messer2020DC <- ndi::messer(state = "DC", year = 2020, imp = TRUE) +messer2020DC <- messer(state = 'DC', year = 2020, imp = TRUE) # Tracts 2020 -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) +tract2020DC <- tracts(state = 'DC', year = 2020, cb = TRUE) # Join -DC2020messer <- merge(tract2020DC, messer2020DC$ndi, by = "GEOID") +DC2020messer <- merge(tract2020DC, messer2020DC$ndi, by = 'GEOID') # Plot of tract-level NDI (Messer) for Washington, D.C. (2020) -dcp <- ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020messer, - ggplot2::aes(fill = NDI), - color = NA, - show.legend = FALSE) + - ggplot2::theme_void() + - ggplot2::theme(axis.text = ggplot2::element_blank()) + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "", - caption = "")+ - ggplot2::ggtitle("", subtitle = "") +dcp <- ggplot() + + geom_sf(data = DC2020messer, aes(fill = NDI), color = NA, show.legend = FALSE) + + theme_void() + + theme(axis.text = element_blank()) + + scale_fill_viridis_c() + + labs(fill = '', caption = '')+ + ggtitle('', subtitle = '') -##################### -# CREATE HEXSTICKER # -##################### +# ---------------------- # +# CREATE HEXAGON STICKER # +# ---------------------- # -s <- hexSticker::sticker(subplot = dcp, - package = "ndi", - p_size = 75, p_x = 0.55, p_y = 0.75, p_color = "#FDE724", # title - s_x = 1.15, s_y = 1.05, s_width = 2.1, s_height = 2.1, # symbol - h_fill = "#695488", # inside - h_color = "#440C54", # outline - dpi = 1000, # resolution - filename = "man/figures/ndi.png", - white_around_sticker = F) +s <-sticker( + subplot = dcp, + package = 'ndi', + p_size = 75, p_x = 0.55, p_y = 0.75, p_color = '#FDE724', # title + s_x = 1.15, s_y = 1.05, s_width = 2.1, s_height = 2.1, # symbol + h_fill = '#695488', # inside + h_color = '#440C54', # outline + dpi = 1000, # resolution + filename = file.path('man', 'figures', 'ndi.png'), + white_around_sticker = FALSE +) -# -------------------------------- END OF CODE --------------------------------- # +# ----------------------------------------- END OF CODE ----------------------------------------- # diff --git a/inst/CITATION b/inst/CITATION index 898c2af..96b2b88 100755 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,247 +1,500 @@ -citHeader("To cite ndi in publications, please use the following and include the version number and DOI:") - -citEntry(entry = "manual", - title = "ndi: Neighborhood Deprivation Indices", - author = personList(as.person("Ian D. Buller")), - publisher = "The Comprehensive R Archive Network", - year = "2022", - number = "0.1.4.9001", - doi = "10.5281/zenodo.6989030", - url = "https://cran.r-project.org/package=ndi", +bibentry(bibtype = 'manual', + title = 'ndi: Neighborhood Deprivation Indices', + author = as.person('Ian D. Buller'), + publisher = 'The Comprehensive R Archive Network', + year = '2024', + number = '0.1.6.9000.', + doi = '10.5281/zenodo.6989030', + url = 'https://cran.r-project.org/package=ndi', - textVersion = - paste("Ian D. Buller (2022).", - "ndi: Neighborhood Deprivation Indices.", - "The Comprehensive R Archive Network.", - "v0.1.4.9001.", - "DOI:10.5281/zenodo.6989030", - "Accessed by: https://cran.r-project.org/package=ndi") -) - -citEntry(entry = "Article", - title = "A spatial measure of neighborhood level racial isolation applied to low birthweight, preterm birth, and birthweight in North Carolina", - author = personList(as.person("Rebecca Anthopolos"), - as.person("Sherman A. James"), - as.person("Alan E. Gelfand"), - as.person("Marie Lynn Miranda")), - journal = "Spatial and Spatio-temporal Epidemiology", - year = "2011", - volume = "2", - number = "4", - pages = "235--246", - doi = "10.1016/j.sste.2011.06.002", - - textVersion = - paste("Rebecca Anthopolos, Sherman A. James, Alan E. Gelfand, Marie Lynn Miranda (2011).", - "A spatial measure of neighborhood level racial isolation applied to low birthweight, preterm birth, and birthweight in North Carolina.", - "Spatial and Spatio-temporal Epidemiology, 2(4), 235-246.", - "DOI:10.1016/j.sste.2011.06.002"), - - header = "If you computed RI (Anthopolos) values, please also cite:" -) - -citEntry(entry = "Article", - title = "On the measurement of inequality", - author = personList(as.person("Anthony B. Atkinson")), - journal = "Journal of economic theory", - year = "1970", - volume = "2", - number = "3", - pages = "244--263", - doi = "10.1016/0022-0531(70)90039-6", - - textVersion = - paste("Anthony B. Atkinson (1970).", - "On the measurement of inequality.", - "Journal of economic theory, 2(3), 244-263.", - "DOI:10.1016/0022-0531(70)90039-6"), - - header = "If you computed AI (Atkinson) values, please also cite:" -) - -citEntry(entry = "Article", - title = "Assessing Disparity Using Measures of Racial and Educational Isolation", - author = personList(as.person("Mercedes A. Bravo"), - as.person("Man Chong Leong"), - as.person("Alan E. Gelfand"), - as.person("Marie Lynn Miranda")), - journal = "International Journal of Environmental Research and Public Health", - year = "2021", - volume = "18", - number = "17", - pages = "9384", - doi = "10.3390/ijerph18179384", - - textVersion = - paste("Mercedes A. Bravo, Man Chong Leong, Alan E. Gelfand, Marie Lynn Miranda (2021).", - "Assessing Disparity Using Measures of Racial and Educational Isolation.", - "International Journal of Environmental Research and Public Health, 18(17), 9384.", - "DOI:10.3390/ijerph18179384"), - - header = "If you computed EI (Bravo) values, please also cite:" -) - -citEntry(entry = "Article", - title = "A Methodological Analysis of Segregation Indexes", - author = personList(as.person("Otis D. Duncan"), - as.person("Beverly Duncan")), - journal = "American Sociological Review", - year = "1955", - volume = "20", - number = "2", - pages = "210--217", - doi = "10.2307/2088328", - - textVersion = - paste("Otis D. Duncan, Beverly Duncan (1955).", - "A Methodological Analysis of Segregation Indexes.", - "American Sociological Review, 20(2), 210-217.", - "DOI:10.2307/2088328"), - - header = "If you computed DI (Duncan) values, please also cite:" -) - -citEntry(entry = "Article", - title = "Measurement of Inequality of Incomes", - author = personList(as.person("Corrado Gini")), - journal = "The Economic Journal", - year = "1921", - volume = "31", - number = "121", - pages = "124--126", - doi = "10.2307/2223319", - - textVersion = - paste("Corrado Gini (1921).", - "Measurement of Inequality of Incomes.", - "The Economic Journal, 31(121), 124-126.", - "DOI:10.2307/2223319"), - - header = "If you retrieved Gini Index values, please also cite:" -) - -citEntry(entry = "Article", - title = "Spatial social polarisation: using the Index of Concentration at the Extremes jointly for income and race/ethnicity to analyse risk of hypertension", - author = personList(as.person("Justin M. Feldman"), - as.person("Pamela D. Waterman"), - as.person("Brent A. Coull"), - as.person("Nancy Krieger")), - journal = "Journal of Epidemiology and Community Health", - year = "2015", - volume = "69", - issue = "12", - pages = "1199--207", - doi = "10.1136/jech-2015-205728", - - textVersion = - paste("Justin M. Feldman, Pamela D. Waterman, Brent A. Coull, Nancy Krieger (2015).", - "Spatial social polarisation: using the Index of Concentration at the Extremes jointly for income and race/ethnicity to analyse risk of hypertension.", - "Journal of Epidemiology and Community Health, 69(12), 1199-207.", - "DOI:10.1136/jech-2015-205728"), - - header = "If you computed ICE (Krieger) values, please also cite (1):" -) - -citEntry(entry = "Article", - title = "Public Health Monitoring of Privilege and Deprivation With the Index of Concentration at the Extremes", - author = personList(as.person("Nancy Krieger"), - as.person("Pamela D. Waterman"), - as.person("Jasmina Spasojevic"), - as.person("Wenhui Li"), - as.person("Wenhui Li"), - as.person("Gretchen Van Wye")), - journal = "American Journal of Public Health ", - year = "2016", - volume = "106", - issue = "2", - pages = "256--263", - doi = "10.2105/AJPH.2015.302955", - - textVersion = - paste("Beth A. Slotman, David G Stinchcomb, Tiffany M. Powell-Wiley, Danielle M. Ostendorf, Brian E. Saelens, Amy A. Gorin, Shannon N. Zenk, David Berrigan (2016).", - "Public Health Monitoring of Privilege and Deprivation With the Index of Concentration at the Extremes.", - "American Journal of Public Health, 106(2), 256-263.", - "DOI:10.2105/AJPH.2015.302955"), - - header = "And (2):" -) - -citEntry(entry = "Article", - title = "The development of a standardized neighborhood deprivation index", - author = personList(as.person("Lynne C. Messer"), - as.person("Barbara A. Laraia"), - as.person("Jay S. Kaufman"), - as.person("Janet Eyster"), - as.person("Claudia Holzman"), - as.person("Jennifer Culhane"), - as.person("Irma Elo"), - as.person("Jessica Burke"), + textVersion = + paste('Ian D. Buller (2024).', + 'ndi: Neighborhood Deprivation Indices.', + 'The Comprehensive R Archive Network.', + 'v0.1.6.9000.', + 'DOI:10.5281/zenodo.6989030', + 'Accessed by: https://cran.r-project.org/package=ndi'), + + header = 'To cite ndi in publications, please use the following and include the version number and DOI:' +) + +bibentry(bibtype = 'Article', + title = 'A spatial measure of neighborhood level racial isolation applied to low birthweight, preterm birth, and birthweight in North Carolina', + author = c(as.person('Rebecca Anthopolos'), + as.person('Sherman A. James'), + as.person('Alan E. Gelfand'), + as.person('Marie Lynn Miranda')), + journal = 'Spatial and Spatio-temporal Epidemiology', + year = '2011', + volume = '2', + number = '4', + pages = '235--246', + doi = '10.1016/j.sste.2011.06.002', + + textVersion = + paste('Rebecca Anthopolos, Sherman A. James, Alan E. Gelfand, Marie Lynn Miranda (2011).', + 'A spatial measure of neighborhood level racial isolation applied to low birthweight, preterm birth, and birthweight in North Carolina.', + 'Spatial and Spatio-temporal Epidemiology, 2(4), 235-246.', + 'DOI:10.1016/j.sste.2011.06.002'), + + header = 'If you computed RI (Anthopolos) values, please also cite:' +) + +bibentry(bibtype = 'Article', + title = 'On the measurement of inequality', + author = as.person('Anthony B. Atkinson'), + journal = 'Journal of economic theory', + year = '1970', + volume = '2', + number = '3', + pages = '244--263', + doi = '10.1016/0022-0531(70)90039-6', + + textVersion = + paste('Anthony B. Atkinson (1970).', + 'On the measurement of inequality.', + 'Journal of economic theory, 2(3), 244-263.', + 'DOI:10.1016/0022-0531(70)90039-6'), + + header = 'If you computed A (Atkinson) values, please also cite:' +) + +bibentry(bibtype = 'Book', + title = 'The Social Areas of Los Angeles: Analysis and Typology', + author = c(as.person('Eshref Shevky'), + as.person('Marilyn Williams')), + year = '1949', + edition = '1st edition', + city = 'Los Angeles', + publisher = 'John Randolph Haynes and Dora Haynes Foundation', + isbn = '978-0-837-15637-8', + + textVersion = + paste('Eshref Shevky, Marilyn Williams (1949).', + 'The Social Areas of Los Angeles: Analysis and Typology.', + '1st Ed.', + 'Los Angeles:John Randolph Haynes and Dora Haynes Foundation.', + 'ISBN-13:978-0-837-15637-8'), + + header = 'If you computed xPy* (Bell) values, please also cite (1):' +) + +bibentry(bibtype = 'Article', + title = 'A Probability Model for the Measurement of Ecological Segregation', + author = as.person('Wendell Bell'), + journal = 'Social Forces', + year = '1954', + volume = '32', + issue = '4', + pages = '357--364', + doi = '10.2307/2574118', + + textVersion = + paste('Wendell Bell (1954).', + 'A Probability Model for the Measurement of Ecological Segregation.', + 'Social Forces, 32(4), 357-364.', + 'DOI:10.2307/2574118'), + + header = 'And (2):' +) + +bibentry(bibtype = 'Article', + title = 'Measures Matter: The Local Exposure/Isolation (LEx/Is) Metrics and Relationships between Local-Level Segregation and Breast Cancer Survival', + author = c(as.person('Amin Bemanian'), + as.person('Kirsten M.M. Beyer')), + journal = 'Cancer Epidemiology, Biomarkers & Prevention', + year = '2017', + volume = '26', + issue = '4', + pages = '516--524', + doi = '10.1158/1055-9965.EPI-16-0926', + + textVersion = + paste('Amin Bemanian, Kirsten M.M. Beyer (2017).', + 'Measures Matter: The Local Exposure/Isolation (LEx/Is) Metrics and Relationships between Local-Level Segregation and Breast Cancer Survival.', + 'Cancer Epidemiology, Biomarkers & Prevention, 26(4), 516-524.', + 'DOI:10.1158/1055-9965.EPI-16-0926'), + + header = 'If you computed LEx/Is (Bemanian & Beyer) values, please also cite:' +) + +bibentry(bibtype = 'Article', + title = 'Assessing Disparity Using Measures of Racial and Educational Isolation', + author = c(as.person('Mercedes A. Bravo'), + as.person('Man Chong Leong'), + as.person('Alan E. Gelfand'), + as.person('Marie Lynn Miranda')), + journal = 'International Journal of Environmental Research and Public Health', + year = '2021', + volume = '18', + number = '17', + pages = '9384', + doi = '10.3390/ijerph18179384', + + textVersion = + paste('Mercedes A. Bravo, Man Chong Leong, Alan E. Gelfand, Marie Lynn Miranda (2021).', + 'Assessing Disparity Using Measures of Racial and Educational Isolation.', + 'International Journal of Environmental Research and Public Health, 18(17), 9384.', + 'DOI:10.3390/ijerph18179384'), + + header = 'If you computed EI (Bravo) values, please also cite:' +) + +bibentry(bibtype = 'Article', + title = 'A Methodological Analysis of Segregation Indexes', + author = c(as.person('Otis D. Duncan'), + as.person('Beverly Duncan')), + journal = 'American Sociological Review', + year = '1955', + volume = '20', + number = '2', + pages = '210--217', + doi = '10.2307/2088328', + + textVersion = + paste('Otis D. Duncan, Beverly Duncan (1955).', + 'A Methodological Analysis of Segregation Indexes.', + 'American Sociological Review, 20(2), 210-217.', + 'DOI:10.2307/2088328'), + + header = 'If you computed D (Duncan & Duncan) values, please also cite:' +) + +bibentry(bibtype = 'Article', + title = 'Measurement of Inequality of Incomes', + author = as.person('Corrado Gini'), + journal = 'The Economic Journal', + year = '1921', + volume = '31', + number = '121', + pages = '124--126', + doi = '10.2307/2223319', + + textVersion = + paste('Corrado Gini (1921).', + 'Measurement of Inequality of Incomes.', + 'The Economic Journal, 31(121), 124-126.', + 'DOI:10.2307/2223319'), + + header = 'If you retrieved G (Gini) values, please also cite:' +) + +bibentry(bibtype = 'Article', + title = 'Spatial social polarisation: using the Index of Concentration at the Extremes jointly for income and race/ethnicity to analyse risk of hypertension', + author = c(as.person('Justin M. Feldman'), + as.person('Pamela D. Waterman'), + as.person('Brent A. Coull'), + as.person('Nancy Krieger')), + journal = 'Journal of Epidemiology and Community Health', + year = '2015', + volume = '69', + issue = '12', + pages = '1199--207', + doi = '10.1136/jech-2015-205728', + + textVersion = + paste('Justin M. Feldman, Pamela D. Waterman, Brent A. Coull, Nancy Krieger (2015).', + 'Spatial social polarisation: using the Index of Concentration at the Extremes jointly for income and race/ethnicity to analyse risk of hypertension.', + 'Journal of Epidemiology and Community Health, 69(12), 1199-207.', + 'DOI:10.1136/jech-2015-205728'), + + header = 'If you computed ICE (Krieger) values, please also cite (1):' +) + +bibentry(bibtype = 'Article', + title = 'Public Health Monitoring of Privilege and Deprivation With the Index of Concentration at the Extremes', + author = c(as.person('Nancy Krieger'), + as.person('Pamela D. Waterman'), + as.person('Jasmina Spasojevic'), + as.person('Wenhui Li'), + as.person('Wenhui Li'), + as.person('Gretchen Van Wye')), + journal = 'American Journal of Public Health ', + year = '2016', + volume = '106', + issue = '2', + pages = '256--263', + doi = '10.2105/AJPH.2015.302955', + + textVersion = + paste('Beth A. Slotman, David G Stinchcomb, Tiffany M. Powell-Wiley, Danielle M. Ostendorf, Brian E. Saelens, Amy A. Gorin, Shannon N. Zenk, David Berrigan (2016).', + 'Public Health Monitoring of Privilege and Deprivation With the Index of Concentration at the Extremes.', + 'American Journal of Public Health, 106(2), 256-263.', + 'DOI:10.2105/AJPH.2015.302955'), + + header = 'And (2):' +) + +bibentry(bibtype = 'Article', + title = 'The development of a standardized neighborhood deprivation index', + author = c(as.person('Lynne C. Messer'), + as.person('Barbara A. Laraia'), + as.person('Jay S. Kaufman'), + as.person('Janet Eyster'), + as.person('Claudia Holzman'), + as.person('Jennifer Culhane'), + as.person('Irma Elo'), + as.person('Jessica Burke'), as.person("Patricia O'Campo")), - journal = "Journal of Urban Health", - year = "2006", - volume = "83", - number = "6", - pages = "1041--1062", - doi = "10.1007/s11524-006-9094-x", - - textVersion = + journal = 'Journal of Urban Health', + year = '2006', + volume = '83', + number = '6', + pages = '1041--1062', + doi = '10.1007/s11524-006-9094-x', + + textVersion = paste("Lynne C. Messer, Barbara A. Laraia, Jay S. Kaufman, Janet Eyster, Claudia Holzman, Jennifer Culhane, Irma Elo, Jessica Burke, Patricia O'Campo (2006).", - "The development of a standardized neighborhood deprivation index.", - "Journal of Urban Health, 83(6), 1041-1062.", - "DOI:10.1007/s11524-006-9094-x"), - - header = "If you computed NDI (Messer) values, please also cite:" -) - -citEntry(entry = "Article", - title = "Geospatial analysis of neighborhood deprivation index (NDI) for the United States by county", - author = personList(as.person("Marcus A. Andrews"), - as.person("Kosuke Tomura"), - as.person("Sophie E. Claudel"), - as.person("Samantha Xu"), - as.person("Joniqua N. Ceasar"), - as.person("Billy S. Collins"), - as.person("Steven Langerman"), - as.person("Valerie M. Mitchell"), - as.person("Yvonne Baumer"), - as.person("Tiffany M. Powell-Wiley")), - journal = "Journal of Maps", - year = "2020", - volume = "16", - issue = "1", - pages = "101--112", - doi = "10.1080/17445647.2020.1750066", - - textVersion = - paste("Marcus A. Andrews, Kosuke Tomura, Sophie E. Claudel, Samantha Xu, Joniqua N. Ceasar, Billy S. Collins, Steven Langerman, Valerie M. Mitchell, Yvonne Baumer, Tiffany M. Powell-Wiley (2022).", - "Geospatial analysis of neighborhood deprivation index (NDI) for the United States by county.", - "Journal of Maps, 16(1), 101-112.", - "DOI:10.1080/17445647.2020.1750066"), - - header = "If you computed NDI (Powell-Wiley) values, please also cite (1):" -) - -citEntry(entry = "Article", - title = "Environmental data and methods from the Accumulating Data to Optimally Predict Obesity Treatment (ADOPT) core measures environmental working group", - author = personList(as.person("Beth A. Slotman"), - as.person("David G Stinchcomb"), - as.person("Tiffany M. Powell-Wiley"), - as.person("Danielle M. Ostendorf"), - as.person("Brian E. Saelens"), - as.person("Amy A. Gorin"), - as.person("Shannon N. Zenk"), - as.person("David Berrigan")), - journal = "Data in Brief", - year = "2022", - volume = "41", - pages = "108002", - doi = "10.1016/j.dib.2022.108002", - - textVersion = - paste("Beth A. Slotman, David G Stinchcomb, Tiffany M. Powell-Wiley, Danielle M. Ostendorf, Brian E. Saelens, Amy A. Gorin, Shannon N. Zenk, David Berrigan (2022).", - "The development of a standardized neighborhood deprivation index.", - "Data in Brief, 41, 108002.", - "DOI:10.1016/j.dib.2022.108002"), - - header = "And (2):" + 'The development of a standardized neighborhood deprivation index.', + 'Journal of Urban Health, 83(6), 1041-1062.', + 'DOI:10.1007/s11524-006-9094-x'), + + header = 'If you computed NDI (Messer) values, please also cite:' +) + +bibentry(bibtype = 'Article', + title = 'Geospatial analysis of neighborhood deprivation index (NDI) for the United States by county', + author = c(as.person('Marcus A. Andrews'), + as.person('Kosuke Tomura'), + as.person('Sophie E. Claudel'), + as.person('Samantha Xu'), + as.person('Joniqua N. Ceasar'), + as.person('Billy S. Collins'), + as.person('Steven Langerman'), + as.person('Valerie M. Mitchell'), + as.person('Yvonne Baumer'), + as.person('Tiffany M. Powell-Wiley')), + journal = 'Journal of Maps', + year = '2020', + volume = '16', + issue = '1', + pages = '101--112', + doi = '10.1080/17445647.2020.1750066', + + textVersion = + paste('Marcus A. Andrews, Kosuke Tomura, Sophie E. Claudel, Samantha Xu, Joniqua N. Ceasar, Billy S. Collins, Steven Langerman, Valerie M. Mitchell, Yvonne Baumer, Tiffany M. Powell-Wiley (2022).', + 'Geospatial analysis of neighborhood deprivation index (NDI) for the United States by county.', + 'Journal of Maps, 16(1), 101-112.', + 'DOI:10.1080/17445647.2020.1750066'), + + header = 'If you computed NDI (Powell-Wiley) values, please also cite (1):' +) + +bibentry(bibtype = 'Article', + title = 'Environmental data and methods from the Accumulating Data to Optimally Predict Obesity Treatment (ADOPT) core measures environmental working group', + author = c(as.person('Beth A. Slotman'), + as.person('David G Stinchcomb'), + as.person('Tiffany M. Powell-Wiley'), + as.person('Danielle M. Ostendorf'), + as.person('Brian E. Saelens'), + as.person('Amy A. Gorin'), + as.person('Shannon N. Zenk'), + as.person('David Berrigan')), + journal = 'Data in Brief', + year = '2022', + volume = '41', + pages = '108002', + doi = '10.1016/j.dib.2022.108002', + + textVersion = + paste('Beth A. Slotman, David G Stinchcomb, Tiffany M. Powell-Wiley, Danielle M. Ostendorf, Brian E. Saelens, Amy A. Gorin, Shannon N. Zenk, David Berrigan (2022).', + 'Environmental data and methods from the Accumulating Data to Optimally Predict Obesity Treatment (ADOPT) core measures environmental working group.', + 'Data in Brief, 41, 108002.', + 'DOI:10.1016/j.dib.2022.108002'), + + header = 'And (2):' +) + +bibentry(bibtype = 'Article', + title = 'Social Structure and Anomie', + author = as.person('Robert K. Merton'), + journal = 'American Sociological Review', + year = '1938', + volume = '3', + number = '5', + pages = '672--682', + doi = '10.2307/2084686 ', + + textVersion = + paste('Robert K. Merton (1938).', + 'Social Structure and Anomie.', + 'American Sociological Review, 3(5), 672-682.', + 'DOI:10.2307/2084686 '), + + header = 'If you computed LQ (Sudano) values, please also cite (1):' +) + +bibentry(bibtype = 'Article', + title = 'Neighborhood racial residential segregation and changes in health or death among older adults', + author = c(as.person('Joseph J. Sudano'), + as.person('Adam Perzynski'), + as.person('David W. Wong'), + as.person('Natalie Colabianchi'), + as.person('David Litaker')), + journal = 'Health & Place', + year = '2013', + volume = '19', + pages = '80--88', + doi = '10.1016/j.healthplace.2012.09.015', + + textVersion = + paste('Joseph J. Sudano, Adam Perzynski, David W. Wong, Natalie Colabianchi, David Litaker (2013).', + 'Neighborhood racial residential segregation and changes in health or death among older adults.', + 'Health & Place, 19, 80-88.', + 'DOI:10.1016/j.healthplace.2012.09.015'), + + header = 'And (2):' +) + +bibentry(bibtype = 'Article', + title = 'A Probability Model for the Measurement of Ecological Segregation', + author = as.person('Wendell Bell'), + journal = 'Social Forces', + year = '1954', + volume = '32', + issue = '4', + pages = '357--364', + doi = '10.2307/2574118', + + textVersion = + paste('Wendell Bell (1954).', + 'A Probability Model for the Measurement of Ecological Segregation.', + 'Social Forces, 32(4), 357-364.', + 'DOI:10.2307/2574118'), + + header = 'If you computed V (White) values, please also cite (1):' +) + +bibentry(bibtype = 'Article', + title = 'Segregation and Diversity Measures in Population Distribution', + author = as.person('Michael J. White'), + journal = 'Population Index', + year = '1986', + volume = '52', + issue = '2', + pages = '198--221', + doi = '10.2307/3644339', + + textVersion = + paste('Michael J. White (1986).', + 'Segregation and Diversity Measures in Population Distribution.', + 'Population Index, 52(2), 198-221.', + 'DOI:10.2307/3644339'), + + header = 'And (2):' +) + +bibentry(bibtype = 'Article', + title = 'Interstate Redistribution of Population, 1850-1940', + author = as.person('Edgar M. Hoover'), + journal = 'Journal of Economic History', + year = '1941', + volume = '1', + pages = '199--205', + doi = '10.2307/2223319', + + textVersion = + paste('Edgar M. Hoover (1941).', + 'Interstate Redistribution of Population, 1850-1940.', + 'Journal of Economic History, 1, 199-205.', + 'DOI:10.2307/2223319'), + + header = 'If you computed DEL (Hoover) values, please also cite (1):' +) + +bibentry(bibtype = 'Book', + title = 'Statistical Geography: Problems in Analyzing Area Data', + author = c(as.person('Otis D. Duncan'), + as.person('Ray P. Cuzzort'), + as.person('Beverly Duncan')), + year = '1961', + publisher = 'Free Press', + lc = '60007089', + + textVersion = + paste('Otis D. Duncan, Ray P. Cuzzort, & Beverly Duncan (1961).', + 'Statistical Geography: Problems in Analyzing Area Data.', + 'Free Press', + 'LC:60007089'), + + header = 'And (2):' +) + +bibentry(bibtype = 'Article', + title = 'Segregation and Diversity Measures in Population Distribution', + author = as.person('Michael J. White'), + journal = 'Population Index', + year = '1986', + volume = '52', + issue = '2', + pages = '198--221', + doi = '10.2307/3644339', + + textVersion = + paste('Michael J. White (1986).', + 'Segregation and Diversity Measures in Population Distribution.', + 'Population Index, 52(2), 198-221.', + 'DOI:10.2307/3644339'), + + header = 'If you computed SP (White) values, please also cite (1):' +) + +bibentry(bibtype = 'Book', + title = 'Inequality and Heterogeneity: A Primitive Theory of Social Structure', + author = as.person('Peter M. Blau'), + year = '1977', + publisher = 'Free Press', + isbn = '978-0-029-03660-0', + + textVersion = + paste('Peter M. Blau (1977).', + 'Inequality and Heterogeneity: A Primitive Theory of Social Structure.', + 'Free Press', + 'ISBN-13:978-0-029-03660-0'), + + header = 'And (2):' +) + +bibentry(bibtype = 'InBook', + title = 'Ethnic Segregation in Cities', + author = as.person('Stanley Lieberson'), + chapter = 'An Asymmetrical Approach to Segregation', + year = '1981', + edition = '1st edition', + editor = c(as.person('Ceri Peach'), + as.person('Vaughan Robinson'), + as.person('Susan Smith')), + city = 'London', + publisher = 'Croom Helm', + isbn = '978-1-032-53884-6', + + textVersion = + paste('Stanley Lieberson (1981).', + '"An Asymmetrical Approach to Segregation." Pp. 61-82 in', + 'Ethnic Segregation in Cities,', + 'edited by Ceri Peach, Vaughan Robinson, and Susan Smith.', + '1st Ed.', + 'London:Croom Helm.', + 'ISBN-13:978-1-032-53884-6'), + + header = 'If you computed xPx* (Lieberson) values, please also cite (1):' +) + +bibentry(bibtype = 'Article', + title = 'A Probability Model for the Measurement of Ecological Segregation', + author = as.person('Wendell Bell'), + journal = 'Social Forces', + year = '1954', + volume = '32', + issue = '4', + pages = '357--364', + doi = '10.2307/2574118', + + textVersion = + paste('Wendell Bell (1954).', + 'A Probability Model for the Measurement of Ecological Segregation.', + 'Social Forces, 32(4), 357-364.', + 'DOI:10.2307/2574118'), + + header = 'And (2):' ) diff --git a/man/anthopolos.Rd b/man/anthopolos.Rd index e3edb0f..8d4039b 100644 --- a/man/anthopolos.Rd +++ b/man/anthopolos.Rd @@ -7,7 +7,7 @@ anthopolos(geo = "tract", year = 2020, subgroup, quiet = FALSE, ...) } \arguments{ -\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}.} +\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}.} \item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} @@ -21,54 +21,57 @@ anthopolos(geo = "tract", year = 2020, subgroup, quiet = FALSE, ...) An object of class 'list'. This is a named list with the following components: \describe{ -\item{\code{ri}}{An object of class 'tbl' for the GEOID, name, RI, and raw census values of specified census geographies.} -\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute RI.} +\item{\code{ri}}{An object of class 'tbl' for the GEOID, name, \emph{RI}, and raw census values of specified census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{RI}.} } } \description{ Compute the spatial Racial Isolation Index (Anthopolos) of selected subgroup(s). } \details{ -This function will compute the spatial Racial Isolation Index (RI) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002} who originally designed the metric for the racial isolation of non-Hispanic Black individuals. This function provides the computation of RI for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +This function will compute the spatial Racial Isolation Index (\emph{RI}) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002} who originally designed the metric for the racial isolation of non-Hispanic Black individuals. This function provides the computation of \emph{RI} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: \itemize{ -\item{B03002_002: }{not Hispanic or Latino "NHoL"} -\item{B03002_003: }{not Hispanic or Latino, white alone "NHoLW"} -\item{B03002_004: }{not Hispanic or Latino, Black or African American alone "NHoLB"} -\item{B03002_005: }{not Hispanic or Latino, American Indian and Alaska Native alone "NHoLAIAN"} -\item{B03002_006: }{not Hispanic or Latino, Asian alone "NHoLA"} -\item{B03002_007: }{not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "NHoLNHOPI"} -\item{B03002_008: }{not Hispanic or Latino, Some other race alone "NHoLSOR"} -\item{B03002_009: }{not Hispanic or Latino, Two or more races "NHoLTOMR"} -\item{B03002_010: }{not Hispanic or Latino, Two races including Some other race "NHoLTRiSOR"} -\item{B03002_011: }{not Hispanic or Latino, Two races excluding Some other race, and three or more races "NHoLTReSOR"} -\item{B03002_012: }{Hispanic or Latino "HoL"} -\item{B03002_013: }{Hispanic or Latino, white alone "HoLW"} -\item{B03002_014: }{Hispanic or Latino, Black or African American alone "HoLB"} -\item{B03002_015: }{Hispanic or Latino, American Indian and Alaska Native alone "HoLAIAN"} -\item{B03002_016: }{Hispanic or Latino, Asian alone "HoLA"} -\item{B03002_017: }{Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "HoLNHOPI"} -\item{B03002_018: }{Hispanic or Latino, Some other race alone "HoLSOR"} -\item{B03002_019: }{Hispanic or Latino, Two or more races "HoLTOMR"} -\item{B03002_020: }{Hispanic or Latino, Two races including Some other race "HoLTRiSOR"} -\item{B03002_021: }{Hispanic or Latino, Two races excluding Some other race, and three or more races "HoLTReSOR"} +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone\code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} } -Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and RI values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the RI for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and \emph{RI} values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the \emph{RI} for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. -A census geography (and its neighbors) that has nearly all of its population who identify with the specified race/ethnicity subgroup(s) (e.g., non-Hispanic or Latino, Black or African American alone) will have an RI value close to 1. In contrast, a census geography (and its neighbors) that has nearly none of its population who identify with the specified race/ethnicity subgroup(s) (e.g., not non-Hispanic or Latino, Black or African American alone) will have an RI value close to 0. +A census geography (and its neighbors) that has nearly all of its population who identify with the specified race/ethnicity subgroup(s) (e.g., non-Hispanic or Latino, Black or African American alone) will have an \emph{RI} value close to 1. In contrast, a census geography (and its neighbors) that has nearly none of its population who identify with the specified race/ethnicity subgroup(s) (e.g., not non-Hispanic or Latino, Black or African American alone) will have an \emph{RI} value close to 0. } \examples{ \dontrun{ # Wrapped in \dontrun{} because these examples require a Census API key. - - # Tract-level metric (2020) - anthopolos(geo = "tract", state = "GA", year = 2020, subgroup = c("NHoLB", "HoLB")) - - # County-level metric (2020) - anthopolos(geo = "county", state = "GA", year = 2020, subgroup = c("NHoLB", "HoLB")) - + + # Racial Isolation Index of Black populations + ## of census tracts within Georgia, U.S.A., counties (2020) + anthopolos( + geo = 'tract', + state = 'GA', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + ) + } } diff --git a/man/atkinson.Rd b/man/atkinson.Rd index a3220dd..dfc5941 100644 --- a/man/atkinson.Rd +++ b/man/atkinson.Rd @@ -16,9 +16,9 @@ atkinson( ) } \arguments{ -\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = "county"}.} +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} -\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = "tract"}.} +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} \item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} @@ -36,58 +36,63 @@ atkinson( An object of class 'list'. This is a named list with the following components: \describe{ -\item{\code{ai}}{An object of class 'tbl' for the GEOID, name, and AI at specified larger census geographies.} -\item{\code{ai_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} -\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute AI} +\item{\code{a}}{An object of class 'tbl' for the GEOID, name, and \emph{A} at specified larger census geographies.} +\item{\code{a_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{A}.} } } \description{ Compute the aspatial Atkinson Index of income or selected racial/ethnic subgroup(s) and U.S. geographies. } \details{ -This function will compute the aspatial Atkinson Index (AI) of income or selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}. This function provides the computation of AI for median household income and any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +This function will compute the aspatial Atkinson Index (\emph{A}) of income or selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}. This function provides the computation of \emph{A} for median household income and any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). -The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. When \code{subgroup = "MedHHInc"}, the metric will be computed for median household income ("B19013_001"). The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. When \code{subgroup = 'MedHHInc'}, the metric will be computed for median household income ('B19013_001'). The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: \itemize{ -\item{B03002_002: }{not Hispanic or Latino "NHoL"} -\item{B03002_003: }{not Hispanic or Latino, white alone "NHoLW"} -\item{B03002_004: }{not Hispanic or Latino, Black or African American alone "NHoLB"} -\item{B03002_005: }{not Hispanic or Latino, American Indian and Alaska Native alone "NHoLAIAN"} -\item{B03002_006: }{not Hispanic or Latino, Asian alone "NHoLA"} -\item{B03002_007: }{not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "NHoLNHOPI"} -\item{B03002_008: }{not Hispanic or Latino, Some other race alone "NHoLSOR"} -\item{B03002_009: }{not Hispanic or Latino, Two or more races "NHoLTOMR"} -\item{B03002_010: }{not Hispanic or Latino, Two races including Some other race "NHoLTRiSOR"} -\item{B03002_011: }{not Hispanic or Latino, Two races excluding Some other race, and three or more races "NHoLTReSOR"} -\item{B03002_012: }{Hispanic or Latino "HoL"} -\item{B03002_013: }{Hispanic or Latino, white alone "HoLW"} -\item{B03002_014: }{Hispanic or Latino, Black or African American alone "HoLB"} -\item{B03002_015: }{Hispanic or Latino, American Indian and Alaska Native alone "HoLAIAN"} -\item{B03002_016: }{Hispanic or Latino, Asian alone "HoLA"} -\item{B03002_017: }{Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "HoLNHOPI"} -\item{B03002_018: }{Hispanic or Latino, Some other race alone "HoLSOR"} -\item{B03002_019: }{Hispanic or Latino, Two or more races "HoLTOMR"} -\item{B03002_020: }{Hispanic or Latino, Two races including Some other race "HoLTRiSOR"} -\item{B03002_021: }{Hispanic or Latino, Two races excluding Some other race, and three or more races "HoLTReSOR"} +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} } Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -AI is a measure of the evenness of residential inequality (e.g., racial/ethnic segregation) when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. The AI metric can range in value from 0 to 1 with smaller values indicating lower levels of inequality (e.g., less segregation). +\emph{A} is a measure of the evenness of residential inequality (e.g., racial/ethnic segregation) when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. \emph{A} can range in value from 0 to 1 with smaller values indicating lower levels of inequality (e.g., less segregation). -The \code{epsilon} argument that determines how to weight the increments to inequality contributed by different proportions of the Lorenz curve. A user must explicitly decide how heavily to weight smaller geographical units at different points on the Lorenz curve (i.e., whether the index should take greater account of differences among areas of over- or under-representation). The \code{epsilon} argument must have values between 0 and 1.0. For \code{0 <= epsilon < 0.5} or less "inequality-averse," smaller geographical units with a subgroup proportion smaller than the subgroup proportion of the larger geographical unit contribute more to inequality ("over-representation"). For \code{0.5 < epsilon <= 1.0} or more "inequality-averse," smaller geographical units with a subgroup proportion larger than the subgroup proportion of the larger geographical unit contribute more to inequality ("under-representation"). If \code{epsilon = 0.5} (the default), units of over- and under-representation contribute equally to the index. See Section 2.3 of Saint-Jacques et al. (2020) \doi{10.48550/arXiv.2002.05819} for one method to select \code{epsilon}. +The \code{epsilon} argument that determines how to weight the increments to inequality contributed by different proportions of the Lorenz curve. A user must explicitly decide how heavily to weight smaller geographical units at different points on the Lorenz curve (i.e., whether the index should take greater account of differences among areas of over- or under-representation). The \code{epsilon} argument must have values between 0 and 1.0. For \code{0 <= epsilon < 0.5} or less 'inequality-averse,' smaller geographical units with a subgroup proportion smaller than the subgroup proportion of the larger geographical unit contribute more to inequality ('over-representation'). For \code{0.5 < epsilon <= 1.0} or more 'inequality-averse,' smaller geographical units with a subgroup proportion larger than the subgroup proportion of the larger geographical unit contribute more to inequality ('under-representation'). If \code{epsilon = 0.5} (the default), units of over- and under-representation contribute equally to the index. See Section 2.3 of Saint-Jacques et al. (2020) \doi{10.48550/arXiv.2002.05819} for one method to select \code{epsilon}. -Larger geographies available include state \code{geo_large = "state"}, county \code{geo_large = "county"}, and census tract \code{geo_large = "tract"} levels. Smaller geographies available include, county \code{geo_small = "county"}, census tract \code{geo_small = "tract"}, and census block group \code{geo_small = "block group"} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the AI value returned is NA. +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{A} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{A} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{A} computation. } \examples{ \dontrun{ # Wrapped in \dontrun{} because these examples require a Census API key. - - # Atkinson Index of non-Hispanic Black vs. non-Hispanic white populations - ## of census tracts within Georgia, U.S.A., counties (2020) - atkinson(geo_large = "county", geo_small = "tract", state = "GA", year = 2020, - subgroup = "NHoLB") - + + # Atkinson Index (a measure of the evenness) of Black populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + atkinson( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + ) + } } diff --git a/man/bell.Rd b/man/bell.Rd new file mode 100644 index 0000000..02fc8ba --- /dev/null +++ b/man/bell.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bell.R +\name{bell} +\alias{bell} +\title{Interaction Index based on Shevky & Williams (1949) and Bell (1954)} +\usage{ +bell( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + subgroup_ixn, + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial/ethnic subgroup(s). See Details for available choices.} + +\item{subgroup_ixn}{Character string specifying the racial/ethnic subgroup(s) as the interaction population. If the same as \code{subgroup}, will compute the simple isolation of the group. See Details for available choices.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{xpy_star}}{An object of class 'tbl' for the GEOID, name, and \emph{xPy\*} at specified larger census geographies.} +\item{\code{xpy_star_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{xPy\*}.} +} +} +\description{ +Compute the aspatial Interaction Index (Bell) of a selected racial/ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute the aspatial Interaction Index (\emph{xPy\*}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954) \doi{10.2307/2574118}. This function provides the computation of \emph{xPy\*} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{xPy\*} is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s) with higher values signifying higher probability of interaction (less isolation). \emph{xPy\*} can range in value from 0 to 1. + +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{xPy\*} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{xPy\*} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{xPy\*} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Interaction of non-Hispanic Black vs. non-Hispanic white populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + bell( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/man/bemanian_beyer.Rd b/man/bemanian_beyer.Rd new file mode 100644 index 0000000..1fabd38 --- /dev/null +++ b/man/bemanian_beyer.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bemanian_beyer.R +\name{bemanian_beyer} +\alias{bemanian_beyer} +\title{Local Exposure and Isolation metric based on Bemanian & Beyer (2017)} +\usage{ +bemanian_beyer( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + subgroup_ixn, + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial/ethnic subgroup(s). See Details for available choices.} + +\item{subgroup_ixn}{Character string specifying the racial/ethnic subgroup(s) as the interaction population. If the same as \code{subgroup}, will compute the simple isolation of the group. See Details for available choices.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{lexis}}{An object of class 'tbl' for the GEOID, name, and \emph{LEx/Is} at specified smaller census geographies.} +\item{\code{lexis_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{LEx/Is}.} +} +} +\description{ +Compute the aspatial Local Exposure and Isolation (Bemanian & Beyer) metric of a selected racial/ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute the aspatial Local Exposure and Isolation (\emph{LEx/Is}) metric of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}. This function provides the computation of \emph{LEx/Is} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{LEx/Is} is a measure of the probability that two individuals living within a specific smaller geography (e.g., census tract) of either different (i.e., exposure) or the same (i.e., isolation) racial/ethnic subgroup(s) will interact, assuming that individuals within a smaller geography are randomly mixed. \emph{LEx/Is} is standardized with a logit transformation and centered against an expected case that all races/ethnicities are evenly distributed across a larger geography. (Note: will adjust data by 0.025 if probabilities are zero, one, or undefined. The output will include a warning if adjusted. See \code{\link[car]{logit}} for additional details.) + +\emph{LEx/Is} can range from negative infinity to infinity. If \emph{LEx/Is} is zero then the estimated probability of the interaction between two people of the given subgroup(s) within a smaller geography is equal to the expected probability if the subgroup(s) were perfectly mixed in the larger geography. If \emph{LEx/Is} is greater than zero then the interaction is more likely to occur within the smaller geography than in the larger geography, and if \emph{LEx/Is} is less than zero then the interaction is less likely to occur within the smaller geography than in the larger geography. Note: the exponentiation of each \emph{LEx/Is} metric results in the odds ratio of the specific exposure or isolation of interest in a smaller geography relative to the larger geography. + +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{LEx/Is} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{LEx/Is} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{LEx/Is} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Local Exposure and Isolation of non-Hispanic Black vs. non-Hispanic white populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + bemanian_beyer( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/man/bravo.Rd b/man/bravo.Rd index d53105b..1559273 100644 --- a/man/bravo.Rd +++ b/man/bravo.Rd @@ -7,7 +7,7 @@ bravo(geo = "tract", year = 2020, subgroup, quiet = FALSE, ...) } \arguments{ -\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}.} +\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}.} \item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} @@ -21,40 +21,43 @@ bravo(geo = "tract", year = 2020, subgroup, quiet = FALSE, ...) An object of class 'list'. This is a named list with the following components: \describe{ -\item{\code{ei}}{An object of class 'tbl' for the GEOID, name, EI, and raw census values of specified census geographies.} -\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute EI.} +\item{\code{ei}}{An object of class 'tbl' for the GEOID, name, \emph{EI}, and raw census values of specified census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{EI}.} } } \description{ Compute the spatial Educational Isolation Index (Bravo) of selected educational attainment category(ies). } \details{ -This function will compute the spatial Educational Isolation Index (EI) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Bravo et al. (2021) \doi{10.3390/ijerph18179384} who originally designed the metric for the educational isolation of individual without a college degree. This function provides the computation of EI for any of the U.S. Census Bureau educational attainment levels. +This function will compute the spatial Educational Isolation Index (\emph{EI}) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Bravo et al. (2021) \doi{10.3390/ijerph18179384} who originally designed the metric for the educational isolation of individual without a college degree. This function provides the computation of \emph{EI} for any of the U.S. Census Bureau educational attainment levels. -The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The five educational attainment levels (U.S. Census Bureau definitions) are: +The function uses the \code{\link[tidycensus]{get_acs}} to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The five educational attainment levels (U.S. Census Bureau definitions) are: \itemize{ -\item{B06009_002: }{Less than high school graduate "LtHS"} -\item{B06009_003: }{High school graduate (includes equivalency) "HSGiE"} -\item{B06009_004: }{Some college or associate's degree "SCoAD"} -\item{B06009_005: }{Bachelor's degree "BD"} -\item{B06009_006: }{Graduate or professional degree "GoPD"} +\item \strong{B06009_002}: Less than high school graduate \code{'LtHS'} +\item \strong{B06009_003}: High school graduate (includes equivalency) \code{'HSGiE'} +\item \strong{B06009_004}: Some college or associate's degree \code{'SCoAD'} +\item \strong{B06009_005}: Bachelor's degree \code{'BD'} +\item \strong{B06009_006}: Graduate or professional degree \code{'GoPD'} } -Note: If \code{year = 2009}, then the ACS-5 data (2005-2009) are from the "B15002" question. +Note: If \code{year = 2009}, then the ACS-5 data (2005-2009) are from the \strong{B15002} question. -Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and EI values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the EI for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and \emph{EI} values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the \emph{EI} for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. -A census geography (and its neighbors) that has nearly all of its population with the specified educational attainment category (e.g., a Bachelor's degree or more) will have an EI value close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population with the specified educational attainment category (e.g., less than a Bachelor's degree) will have an EI value close to 0. +A census geography (and its neighbors) that has nearly all of its population with the specified educational attainment category (e.g., a Bachelor's degree or more) will have an \emph{EI} value close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population with the specified educational attainment category (e.g., less than a Bachelor's degree) will have an \emph{EI} value close to 0. } \examples{ \dontrun{ # Wrapped in \dontrun{} because these examples require a Census API key. - - # Tract-level metric (2020) - bravo(geo = "tract", state = "GA", year = 2020, subgroup = c("LtHS", "HSGiE")) - - # County-level metric (2020) - bravo(geo = "county", state = "GA", year = 2020, subgroup = c("LtHS", "HSGiE")) - + + # Educational Isolation Index of less than some college or associate's degree attainment + ## of census tracts within Georgia, U.S.A., counties (2020) + bravo( + geo = 'tract', + state = 'GA', + year = 2020, + subgroup = c('LtHS', 'HSGiE') + ) + } } diff --git a/man/duncan.Rd b/man/duncan.Rd index c92eb26..a55a080 100644 --- a/man/duncan.Rd +++ b/man/duncan.Rd @@ -16,9 +16,9 @@ duncan( ) } \arguments{ -\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = "county"}.} +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} -\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = "tract"}.} +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} \item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} @@ -36,56 +36,62 @@ duncan( An object of class 'list'. This is a named list with the following components: \describe{ -\item{\code{di}}{An object of class 'tbl' for the GEOID, name, and DI at specified larger census geographies.} -\item{\code{di_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} -\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute DI} +\item{\code{di}}{An object of class 'tbl' for the GEOID, name, and \emph{D} at specified larger census geographies.} +\item{\code{d_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{D}.} } } \description{ -Compute the aspatial Dissimilarity Index (Duncan) of selected racial/ethnic subgroup(s) and U.S. geographies +Compute the aspatial Dissimilarity Index (Duncan & Duncan) of selected racial/ethnic subgroup(s) and U.S. geographies } \details{ -This function will compute the aspatial Dissimilarity Index (DI) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Duncan & Duncan (1955) \doi{10.2307/2088328}. This function provides the computation of DI for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). +This function will compute the aspatial Dissimilarity Index (\emph{D}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Duncan & Duncan (1955) \doi{10.2307/2088328}. This function provides the computation of \emph{D} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). -The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: \itemize{ -\item{B03002_002: }{not Hispanic or Latino "NHoL"} -\item{B03002_003: }{not Hispanic or Latino, white alone "NHoLW"} -\item{B03002_004: }{not Hispanic or Latino, Black or African American alone "NHoLB"} -\item{B03002_005: }{not Hispanic or Latino, American Indian and Alaska Native alone "NHoLAIAN"} -\item{B03002_006: }{not Hispanic or Latino, Asian alone "NHoLA"} -\item{B03002_007: }{not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "NHoLNHOPI"} -\item{B03002_008: }{not Hispanic or Latino, Some other race alone "NHoLSOR"} -\item{B03002_009: }{not Hispanic or Latino, Two or more races "NHoLTOMR"} -\item{B03002_010: }{not Hispanic or Latino, Two races including Some other race "NHoLTRiSOR"} -\item{B03002_011: }{not Hispanic or Latino, Two races excluding Some other race, and three or more races "NHoLTReSOR"} -\item{B03002_012: }{Hispanic or Latino "HoL"} -\item{B03002_013: }{Hispanic or Latino, white alone "HoLW"} -\item{B03002_014: }{Hispanic or Latino, Black or African American alone "HoLB"} -\item{B03002_015: }{Hispanic or Latino, American Indian and Alaska Native alone "HoLAIAN"} -\item{B03002_016: }{Hispanic or Latino, Asian alone "HoLA"} -\item{B03002_017: }{Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "HoLNHOPI"} -\item{B03002_018: }{Hispanic or Latino, Some other race alone "HoLSOR"} -\item{B03002_019: }{Hispanic or Latino, Two or more races "HoLTOMR"} -\item{B03002_020: }{Hispanic or Latino, Two races including Some other race "HoLTRiSOR"} -\item{B03002_021: }{Hispanic or Latino, Two races excluding Some other race, and three or more races "HoLTReSOR"} +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} } Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -DI is a measure of the evenness of racial/ethnic residential segregation when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. DI can range in value from 0 to 1 and represents the proportion of racial/ethnic subgroup members that would have to change their area of residence to achieve an even distribution within the larger geographical area under conditions of maximum segregation. +\emph{D} is a measure of the evenness of racial/ethnic residential segregation when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. \emph{D} can range in value from 0 to 1 and represents the proportion of racial/ethnic subgroup members that would have to change their area of residence to achieve an even distribution within the larger geographical area under conditions of maximum segregation. -Larger geographies available include state \code{geo_large = "state"}, county \code{geo_large = "county"}, and census tract \code{geo_large = "tract"} levels. Smaller geographies available include, county \code{geo_small = "county"}, census tract \code{geo_small = "tract"}, and census block group \code{geo_small = "block group"} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the DI value returned is NA. +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{D} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{D} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{D} computation. } \examples{ \dontrun{ # Wrapped in \dontrun{} because these examples require a Census API key. - + # Dissimilarity Index of non-Hispanic Black vs. non-Hispanic white populations - ## of census tracts within Georgia, U.S.A., counties (2020) - duncan(geo_large = "county", geo_small = "tract", state = "GA", year = 2020, - subgroup = "NHoLB", subgroup_ref = "NHoLW") - + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + duncan( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW' + ) + } } diff --git a/man/figures/a.png b/man/figures/a.png new file mode 100644 index 0000000..5bf95ea Binary files /dev/null and b/man/figures/a.png differ diff --git a/man/figures/ai.png b/man/figures/ai.png deleted file mode 100644 index a7d7aec..0000000 Binary files a/man/figures/ai.png and /dev/null differ diff --git a/man/figures/d.png b/man/figures/d.png new file mode 100644 index 0000000..47c0275 Binary files /dev/null and b/man/figures/d.png differ diff --git a/man/figures/del.png b/man/figures/del.png new file mode 100644 index 0000000..02ef683 Binary files /dev/null and b/man/figures/del.png differ diff --git a/man/figures/di.png b/man/figures/di.png deleted file mode 100644 index 289cb5d..0000000 Binary files a/man/figures/di.png and /dev/null differ diff --git a/man/figures/ei.png b/man/figures/ei.png index 6c87c56..8808633 100644 Binary files a/man/figures/ei.png and b/man/figures/ei.png differ diff --git a/man/figures/g.png b/man/figures/g.png new file mode 100644 index 0000000..6a2301e Binary files /dev/null and b/man/figures/g.png differ diff --git a/man/figures/gini.png b/man/figures/gini.png deleted file mode 100644 index dd05541..0000000 Binary files a/man/figures/gini.png and /dev/null differ diff --git a/man/figures/ice1.png b/man/figures/ice1.png index 3884454..32f4d11 100644 Binary files a/man/figures/ice1.png and b/man/figures/ice1.png differ diff --git a/man/figures/ice2.png b/man/figures/ice2.png index 9660aa3..74c032c 100644 Binary files a/man/figures/ice2.png and b/man/figures/ice2.png differ diff --git a/man/figures/ice3.png b/man/figures/ice3.png index 056d56f..fcf89fd 100644 Binary files a/man/figures/ice3.png and b/man/figures/ice3.png differ diff --git a/man/figures/ice4.png b/man/figures/ice4.png index 5b4ddef..fd2d923 100644 Binary files a/man/figures/ice4.png and b/man/figures/ice4.png differ diff --git a/man/figures/ice5.png b/man/figures/ice5.png index 65c9ec7..db42245 100644 Binary files a/man/figures/ice5.png and b/man/figures/ice5.png differ diff --git a/man/figures/lexis.png b/man/figures/lexis.png new file mode 100644 index 0000000..7cf4751 Binary files /dev/null and b/man/figures/lexis.png differ diff --git a/man/figures/lq.png b/man/figures/lq.png new file mode 100644 index 0000000..f286e16 Binary files /dev/null and b/man/figures/lq.png differ diff --git a/man/figures/messer1.png b/man/figures/messer1.png index 7dfd8d6..84c8f8b 100644 Binary files a/man/figures/messer1.png and b/man/figures/messer1.png differ diff --git a/man/figures/messer2.png b/man/figures/messer2.png index 85e539b..db872d8 100644 Binary files a/man/figures/messer2.png and b/man/figures/messer2.png differ diff --git a/man/figures/powell_wiley1.png b/man/figures/powell_wiley1.png index 0d754d2..ac7d246 100644 Binary files a/man/figures/powell_wiley1.png and b/man/figures/powell_wiley1.png differ diff --git a/man/figures/powell_wiley2.png b/man/figures/powell_wiley2.png index 2f1045b..4d1434c 100644 Binary files a/man/figures/powell_wiley2.png and b/man/figures/powell_wiley2.png differ diff --git a/man/figures/powell_wiley3.png b/man/figures/powell_wiley3.png index b3a796e..efea5f7 100644 Binary files a/man/figures/powell_wiley3.png and b/man/figures/powell_wiley3.png differ diff --git a/man/figures/powell_wiley4.png b/man/figures/powell_wiley4.png index 69ff665..a361fff 100644 Binary files a/man/figures/powell_wiley4.png and b/man/figures/powell_wiley4.png differ diff --git a/man/figures/ri.png b/man/figures/ri.png index 992b59e..5b5e981 100644 Binary files a/man/figures/ri.png and b/man/figures/ri.png differ diff --git a/man/figures/sp.png b/man/figures/sp.png new file mode 100644 index 0000000..921cda5 Binary files /dev/null and b/man/figures/sp.png differ diff --git a/man/figures/v.png b/man/figures/v.png new file mode 100644 index 0000000..321964b Binary files /dev/null and b/man/figures/v.png differ diff --git a/man/figures/xpx_star.png b/man/figures/xpx_star.png new file mode 100644 index 0000000..b43c9ef Binary files /dev/null and b/man/figures/xpx_star.png differ diff --git a/man/figures/xpy_star.png b/man/figures/xpy_star.png new file mode 100644 index 0000000..70738d1 Binary files /dev/null and b/man/figures/xpy_star.png differ diff --git a/man/gini.Rd b/man/gini.Rd index d9b1976..ff99e1c 100644 --- a/man/gini.Rd +++ b/man/gini.Rd @@ -7,7 +7,7 @@ gini(geo = "tract", year = 2020, quiet = FALSE, ...) } \arguments{ -\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}.} +\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}.} \item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} @@ -19,32 +19,30 @@ gini(geo = "tract", year = 2020, quiet = FALSE, ...) An object of class 'list'. This is a named list with the following components: \describe{ -\item{\code{gini}}{An object of class 'tbl' for the GEOID, name, and Gini index of specified census geographies.} -\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for the Gini index.} +\item{\code{g}}{An object of class 'tbl' for the GEOID, name, and \emph{G} of specified census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for \emph{G}.} } } \description{ Retrieve the aspatial Gini Index of income inequality. } \details{ -This function will retrieve the aspatial Gini Index of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Gini (1921) \doi{10.2307/2223319}. +This function will retrieve the aspatial Gini Index (\emph{G}) of U.S. census tracts or counties for a specified geographical extent (e.g., the entire U.S. or a single state) based on Gini (1921) \doi{10.2307/2223319}. -The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey estimates of the Gini Index for income inequality (ACS: B19083). The estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey estimates of \emph{G} for income inequality (ACS: B19083). The estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -According to the U.S. Census Bureau \url{https://www.census.gov/topics/income-poverty/income-inequality/about/metrics/gini-index.html}: "The Gini Index is a summary measure of income inequality. The Gini coefficient incorporates the detailed shares data into a single statistic, which summarizes the dispersion of income across the entire income distribution. The Gini coefficient ranges from 0, indicating perfect equality (where everyone receives an equal share), to 1, perfect inequality (where only one recipient or group of recipients receives all the income). The Gini is based on the difference between the Lorenz curve (the observed cumulative income distribution) and the notion of a perfectly equal income distribution." +According to the U.S. Census Bureau \url{https://www.census.gov/topics/income-poverty/income-inequality/about/metrics/gini-index.html}: 'The Gini Index is a summary measure of income inequality. The Gini coefficient incorporates the detailed shares data into a single statistic, which summarizes the dispersion of income across the entire income distribution. The Gini coefficient ranges from 0, indicating perfect equality (where everyone receives an equal share), to 1, perfect inequality (where only one recipient or group of recipients receives all the income). The Gini is based on the difference between the Lorenz curve (the observed cumulative income distribution) and the notion of a perfectly equal income distribution.' } \examples{ \dontrun{ # Wrapped in \dontrun{} because these examples require a Census API key. - - # Tract-level metric (2020) - gini(geo = "tract", state = "GA", year = 2020) - - # County-level metric (2020) - gini(geo = "county", state = "GA", year = 2020) - + + # Gini Index of income inequality + ## of census tracts within Georgia, U.S.A., counties (2020) + gini(geo = 'tract', state = 'GA', year = 2020) + } } diff --git a/man/hoover.Rd b/man/hoover.Rd new file mode 100644 index 0000000..8c6d6e1 --- /dev/null +++ b/man/hoover.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hoover.R +\name{hoover} +\alias{hoover} +\title{Delta based on Hoover (1941) and Duncan et al. (1961)} +\usage{ +hoover( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial/ethnic subgroup(s). See Details for available choices.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{del}}{An object of class 'tbl' for the GEOID, name, and \emph{DEL} at specified larger census geographies.} +\item{\code{del_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{DEL}.} +} +} +\description{ +Compute the aspatial Delta (Hoover) of a selected racial/ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute the aspatial Delta (\emph{DEL}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Hoover (1941) \doi{10.1017/S0022050700052980} and Duncan, Cuzzort, and Duncan (1961; LC:60007089). This function provides the computation of \emph{DEL} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{DEL} is a measure of the proportion of members of one subgroup(s) residing in geographic units with above average density of members of the subgroup(s). The index provides the proportion of a subgroup population that would have to move across geographic units to achieve a uniform density. \emph{DEL} can range in value from 0 to 1. + +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{DEL} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{DEL} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{DEL} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Delta (a measure of concentration) of non-Hispanic Black populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + hoover( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = 'NHoLB' + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/man/krieger.Rd b/man/krieger.Rd index 43eb153..f9cb60e 100644 --- a/man/krieger.Rd +++ b/man/krieger.Rd @@ -7,7 +7,7 @@ krieger(geo = "tract", year = 2020, quiet = FALSE, ...) } \arguments{ -\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}.} +\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}.} \item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} @@ -19,47 +19,47 @@ krieger(geo = "tract", year = 2020, quiet = FALSE, ...) An object of class 'list'. This is a named list with the following components: \describe{ -\item{\code{ice}}{An object of class 'tbl' for the GEOID, name, ICE metrics, and raw census values of specified census geographies.} -\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute the ICEs.} +\item{\code{ice}}{An object of class 'tbl' for the GEOID, name, \emph{ICE} metrics, and raw census values of specified census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute the \emph{ICE} metrics.} } } \description{ Compute the aspatial Index of Concentration at the Extremes (Krieger). } \details{ -This function will compute three aspatial Index of Concentration at the Extremes (ICE) of U.S. census tracts or counties for a specified geographical extent (e.g., entire U.S. or a single state) based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. The authors expanded the metric designed by Massey in a chapter of Booth & Crouter (2001) \doi{10.4324/9781410600141} who initially designed the metric for residential segregation. This function computes five ICE metrics: +This function will compute three aspatial Index of Concentration at the Extremes (\emph{ICE}) of U.S. census tracts or counties for a specified geographical extent (e.g., entire U.S. or a single state) based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. The authors expanded the metric designed by Massey in a chapter of Booth & Crouter (2001) \doi{10.4324/9781410600141} who initially designed the metric for residential segregation. This function computes five \emph{ICE} metrics: \itemize{ -\item{Income}{80th income percentile vs. 20th income percentile} -\item{Education}{less than high school vs. four-year college degree or more} -\item{Race/Ethnicity}{white non-Hispanic vs. black non-Hispanic} -\item{Income and race/ethnicity combined}{white non-Hispanic in 80th income percentile vs. black alone (including Hispanic) in 20th income percentile} -\item{Income and race/ethnicity combined}{white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile} +\item \strong{Income}: 80th income percentile vs. 20th income percentile +\item \strong{Education}: less than high school vs. four-year college degree or more +\item \strong{Race/Ethnicity}: white non-Hispanic vs. black non-Hispanic +\item \strong{Income and race/ethnicity combined}: white non-Hispanic in 80th income percentile vs. black alone (including Hispanic) in 20th income percentile +\item \strong{Income and race/ethnicity combined}: white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile } -The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The ACS-5 groups used in the computation of the five ICE metrics are: +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The ACS-5 groups used in the computation of the five \emph{ICE} metrics are: \itemize{ -\item{B03002: }{HISPANIC OR LATINO ORIGIN BY RACE} -\item{B15002: }{SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER} -\item{B19001: }{HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS)} -\item{B19001B: }{HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) (BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER)} -\item{B19001H: }{HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) (WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER)} +\item \strong{B03002}: HISPANIC OR LATINO ORIGIN BY RACE +\item \strong{B15002}: SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER +\item \strong{B19001}: HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) +\item \strong{B19001B}: HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) (BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER) +\item \strong{B19001H}: HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 20XX INFLATION-ADJUSTED DOLLARS) (WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER) } Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -ICE metrics can range in value from -1 (most deprived) to 1 (most privileged). A value of 0 can thus represent two possibilities: (1) none of the residents are in the most privileged or most deprived categories, or (2) an equal number of persons are in the most privileged and most deprived categories, and in both cases indicates that the area is not dominated by extreme concentrations of either of the two groups. +\emph{ICE} metrics can range in value from -1 (most deprived) to 1 (most privileged). A value of 0 can thus represent two possibilities: (1) none of the residents are in the most privileged or most deprived categories, or (2) an equal number of persons are in the most privileged and most deprived categories, and in both cases indicates that the area is not dominated by extreme concentrations of either of the two groups. } \examples{ \dontrun{ # Wrapped in \dontrun{} because these examples require a Census API key. - - # Tract-level metric (2020) - krieger(geo = "tract", state = "GA", year = 2020) - - # County-level metric (2020) - krieger(geo = "county", state = "GA", year = 2020) - + + # Tract-level metrics (2020) + krieger(geo = 'tract', state = 'GA', year = 2020) + + # County-level metrics (2020) + krieger(geo = 'county', state = 'GA', year = 2020) + } } diff --git a/man/lieberson.Rd b/man/lieberson.Rd new file mode 100644 index 0000000..99aa856 --- /dev/null +++ b/man/lieberson.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lieberson.R +\name{lieberson} +\alias{lieberson} +\title{Isolation Index based on Lieberson (1981) and Bell (1954)} +\usage{ +lieberson( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial/ethnic subgroup(s). See Details for available choices.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{xpx_star}}{An object of class 'tbl' for the GEOID, name, and \emph{xPx\*} at specified larger census geographies.} +\item{\code{xpx_star_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{xPx\*}.} +} +} +\description{ +Compute the aspatial Isolation Index (Lieberson) of a selected racial/ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute the aspatial Isolation Index (\emph{xPx\*}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and Bell (1954) \doi{10.2307/2574118}. This function provides the computation of \emph{xPx\*} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{xPx\*} is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of their subgroup(s) with higher values signifying higher probability of interaction (less isolation). \emph{xPx\*} can range in value from 0 to 1. + +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{xPx\*} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{xPx\*} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{xPx\*} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Interaction of non-Hispanic Black vs. non-Hispanic white populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + bell( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = 'NHoLB' + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/man/messer.Rd b/man/messer.Rd index 73b3725..a64f52a 100644 --- a/man/messer.Rd +++ b/man/messer.Rd @@ -15,7 +15,7 @@ messer( ) } \arguments{ -\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}.} +\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}.} \item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2010 onward are currently available.} @@ -23,7 +23,7 @@ messer( \item{quiet}{Logical. If TRUE, will display messages about potential missing census information and the proportion of variance explained by principal component analysis. The default is FALSE.} -\item{round_output}{Logical. If TRUE, will round the output of raw census and NDI values from the \code{\link[tidycensus]{get_acs}} at one and four significant digits, respectively. The default is FALSE.} +\item{round_output}{Logical. If TRUE, will round the output of raw census and \emph{NDI} values from the \code{\link[tidycensus]{get_acs}} at one and four significant digits, respectively. The default is FALSE.} \item{df}{Optional. Pass a pre-formatted \code{'dataframe'} or \code{'tibble'} with the desired variables through the function. Bypasses the data obtained by \code{\link[tidycensus]{get_acs}}. The default is NULL. See Details below.} @@ -33,33 +33,33 @@ messer( An object of class 'list'. This is a named list with the following components: \describe{ -\item{\code{ndi}}{An object of class 'tbl' for the GEOID, name, NDI (standardized), NDI (quartile), and raw census values of specified census geographies.} -\item{\code{pca}}{An object of class 'principal', returns the output of \code{\link[psych]{principal}} used to compute the NDI values.} -\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute NDI.} +\item{\code{ndi}}{An object of class 'tbl' for the GEOID, name, \emph{NDI} (standardized), \emph{NDI} (quartile), and raw census values of specified census geographies.} +\item{\code{pca}}{An object of class 'principal', returns the output of \code{\link[psych]{principal}} used to compute the \emph{NDI} values.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{NDI}.} } } \description{ Compute the aspatial Neighborhood Deprivation Index (Messer). } \details{ -This function will compute the aspatial Neighborhood Deprivation Index (NDI) of U.S. census tracts or counties for a specified geographical referent (e.g., US-standardized) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x}. +This function will compute the aspatial Neighborhood Deprivation Index (\emph{NDI}) of U.S. census tracts or counties for a specified geographical referent (e.g., US-standardized) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x}. The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for computation involving a principal component analysis with the \code{\link[psych]{principal}} function. The yearly estimates are available for 2010 and after when all census characteristics became available. The eight characteristics are: \itemize{ -\item{C24030: }{percent males in management, science, and arts occupation} -\item{B25014: }{percent of crowded housing} -\item{B17017: }{percent of households in poverty} -\item{B25115: }{percent of female headed households with dependents} -\item{B19058: }{percent of households on public assistance} -\item{B19001: }{percent of households earning <$30,000 per year} -\item{B06009: }{percent earning less than a high school education} -\item{B23025: }{percent unemployed (2011 onward)} -\item{B23001: }{percent unemployed (2010 only)} +\item \strong{OCC (C24030)}: percent males in management, science, and arts occupation +\item \strong{CWD (B25014)}: percent of crowded housing +\item \strong{POV (B17017)}: percent of households in poverty +\item \strong{FHH (B25115)}: percent of female headed households with dependents +\item \strong{PUB (B19058)}: percent of households on public assistance +\item \strong{U30 (B19001)}: percent of households earning <$30,000 per year +\item \strong{EDU (B06009)}: percent earning less than a high school education +\item \strong{EMP (B23025)}: percent unemployed (2011 onward) +\item \strong{EMP (B23001)}: percent unemployed (2010 only) } -Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify the referent for standardizing the NDI (Messer) values. For example, if all U.S. states are specified for the \code{state} argument, then the output would be a U.S.-standardized index. +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify the referent for standardizing the \emph{NDI} (Messer) values. For example, if all U.S. states are specified for the \code{state} argument, then the output would be a U.S.-standardized index. -The continuous NDI (Messer) values are z-transformed, i.e., "standardized," and the categorical NDI (Messer) values are quartiles of the standardized continuous NDI (Messer) values. +The continuous \emph{NDI} (Messer) values are z-transformed, i.e., 'standardized,' and the categorical \emph{NDI} (Messer) values are quartiles of the standardized continuous \emph{NDI} (Messer) values. Check if the proportion of variance explained by the first principal component is high (more than 0.5). @@ -72,14 +72,11 @@ messer(df = DCtracts2020[ , c(1, 3:10)]) \dontrun{ # Wrapped in \dontrun{} because these examples require a Census API key. - # Tract-level metric (2020) - messer(geo = "tract", state = "GA", year = 2020) + # Tract-level NDI (Messer; 2020) + messer(geo = 'tract', state = 'GA', year = 2020) - # Impute NDI for tracts (2020) with missing census information (median values) - messer(state = "tract", "GA", year = 2020, imp = TRUE) - - # County-level metric (2020) - messer(geo = "county", state = "GA", year = 2020) + # Impute NDI (Messer; 2020) for tracts with missing census information (median values) + messer(state = 'tract', state = 'GA', year = 2020, imp = TRUE) } diff --git a/man/ndi-package.Rd b/man/ndi-package.Rd index e81e0fd..0ee32e6 100644 --- a/man/ndi-package.Rd +++ b/man/ndi-package.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/package.R +% Please edit documentation in R/ndi-package.R \docType{package} \name{ndi-package} \alias{ndi-package} @@ -9,39 +9,61 @@ Computes various metrics of socio-economic deprivation and disparity in the United States based on information available from the U.S. Census Bureau. } \details{ -The 'ndi' package computes various metrics of socio-economic deprivation and disparity in the United States. Some metrics are considered "spatial" because they consider the values of neighboring (i.e., adjacent) census geographies in their computation, while other metrics are "aspatial" because they only consider the value within each census geography. Two types of aspatial neighborhood deprivation indices (NDI) are available: (1) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x} and (2) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} who use variables chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x}. Both are a decomposition of multiple demographic characteristics from the U.S. Census Bureau American Community Survey 5-year estimates (ACS-5; 2006-2010 onward). Using data from the ACS-5 (2005-2009 onward), the package can also (1) compute the spatial Racial Isolation Index (RI) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002}, (2) compute the spatial Educational Isolation Index (EI) based on Bravo et al. (2021) \doi{10.3390/ijerph18179384}, (3) compute the aspatial Index of Concentration at the Extremes (ICE) based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}, (4) compute the aspatial Atkinson Index based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}, (5) compute the aspatial Dissimilarity Index based on Duncan & Duncan (1955) \doi{10.2307/2088328}, and (6) retrieve the aspatial Gini Index based on Gini (1921) \doi{10.2307/2223319}. +The 'ndi' package computes various metrics of socio-economic deprivation and disparity in the United States. Some metrics are considered "spatial" because they consider the values of neighboring (i.e., adjacent) census geographies in their computation, while other metrics are "aspatial" because they only consider the value within each census geography. Two types of aspatial neighborhood deprivation indices (\emph{NDI}) are available: (1) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x} and (2) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} who use variables chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x}. Both are a decomposition of multiple demographic characteristics from the U.S. Census Bureau American Community Survey 5-year estimates (ACS-5; 2006-2010 onward). Using data from the ACS-5 (2005-2009 onward), the package can also compute the (1) spatial Racial Isolation Index (\emph{RI}) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002}, (2) spatial Educational Isolation Index (\emph{EI}) based on Bravo et al. (2021) \doi{10.3390/ijerph18179384}, (3) aspatial Index of Concentration at the Extremes (\emph{ICE}) based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}, (4) aspatial racial/ethnic Dissimilarity Index (\emph{DI}) based on Duncan & Duncan (1955) \doi{10.2307/2088328}, (5) aspatial income or racial/ethnic Atkinson Index (\emph{AI}) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}, (6) aspatial racial/ethnic Isolation Index (\emph{II}) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954) \doi{10.2307/2574118}, (7) aspatial racial/ethnic Correlation Ratio (\emph{V}) based on Bell (1954) \doi{10.2307/2574118} and White (1986) \doi{10.2307/3644339}, (8) aspatial racial/ethnic Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}, (9) aspatial racial/ethnic Local Exposure and Isolation (\emph{LEx/Is}) metric based on Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}, (10) aspatial racial/ethnic Delta (\emph{DEL}) based on Hoover (1941) \doi{10.1017/S0022050700052980} and Duncan et al. (1961; LC:60007089), and (11) an index of spatial proximity (\emph{SP}) based on White (1986) \doi{10.2307/3644339} and Blau (1977; ISBN-13:978-0-029-03660-0). Also using data from the ACS-5 (2005-2009 onward), the package can retrieve the aspatial Gini Index (\emph{G}) based on Gini (1921) \doi{10.2307/2223319}. Key content of the 'ndi' package include:\cr \bold{Metrics of Socio-Economic Deprivation and Disparity} -\code{\link{anthopolos}} Computes the spatial Racial Isolation Index (RI) based on Anthopolos (2011) \doi{10.1016/j.sste.2011.06.002}. +\code{\link{anthopolos}} Computes the spatial Racial Isolation Index (\emph{RI}) based on Anthopolos (2011) \doi{10.1016/j.sste.2011.06.002}. -\code{\link{atkinson}} Computes the aspatial Atkinson Index (AI) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}. +\code{\link{atkinson}} Computes the aspatial income or racial/ethnic Atkinson Index (\emph{A}) based on Atkinson (1970) \doi{10.1016/0022-0531(70)90039-6}. -\code{\link{bravo}} Computes the spatial Educational Isolation Index (EI) based on Bravo (2021) \doi{10.3390/ijerph18179384}. +\code{\link{bell}} Computes the aspatial racial/ethnic Interaction Index (\emph{xPy\*}) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954) \doi{10.2307/2574118}. -\code{\link{duncan}} Computes the aspatial racial/ethnic Dissimilarity Index based on Duncan & Duncan (1955) \doi{10.2307/2088328}. +\code{\link{bemanian_beyer}} Computes the aspatial racial/ethnic Local Exposure and Isolation (\emph{LEx/Is}) metric based on Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}. -\code{\link{gini}} Retrieves the aspatial Gini Index based on Gini (1921) \doi{10.2307/2223319}. +\code{\link{bravo}} Computes the spatial Educational Isolation Index (\emph{EI}) based on Bravo (2021) \doi{10.3390/ijerph18179384}. + +\code{\link{duncan}} Computes the aspatial racial/ethnic Dissimilarity Index (\emph{D}) based on Duncan & Duncan (1955) \doi{10.2307/2088328}. + +\code{\link{gini}} Retrieves the aspatial Gini Index (\emph{G}) based on Gini (1921) \doi{10.2307/2223319}. + +\code{\link{hoover}} Computes the aspatial racial/ethnic Delta (\emph{DEL}) based on Hoover (1941) \doi{doi:10.1017/S0022050700052980} and Duncan et al. (1961; LC:60007089). \code{\link{krieger}} Computes the aspatial Index of Concentration at the Extremes based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. -\code{\link{messer}} Computes the aspatial Neighborhood Deprivation Index (NDI) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x}. +\code{\link{lieberson}} Computes the aspatial racial/ethnic Isolation Index (\emph{xPx\*}) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and Bell (1954) \doi{10.2307/2574118}. + +\code{\link{messer}} Computes the aspatial Neighborhood Deprivation Index (\emph{NDI}) based on Messer et al. (2006) \doi{10.1007/s11524-006-9094-x}. + +\code{\link{powell_wiley}} Computes the aspatial Neighborhood Deprivation Index (\emph{NDI}) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} who use variables chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x}. -\code{\link{powell_wiley}} Computes the aspatial Neighborhood Deprivation Index (NDI) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} who use variables chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x}. +\code{\link{sudano}} Computes the aspatial racial/ethnic Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. + +\code{\link{white}} Computes the aspatial racial/ethnic Correlation Ratio (\emph{V}) based on Bell (1954) \doi{10.2307/2574118} and White (1986) \doi{10.2307/3644339}. + +\code{\link{white_blau}} Computes an index of spatial proximity (\emph{SP}) based on White (1986) \doi{10.2307/3644339} and Blau (1977; ISBN-13:978-0-029-03660-0). \bold{Pre-formatted U.S. Census Data} \code{\link{DCtracts2020}} A sample dataset containing information about U.S. Census American Community Survey 5-year estimate data for the District of Columbia census tracts (2020). The data are obtained from the \code{\link[tidycensus]{get_acs}} function and formatted for the \code{\link{messer}} and \code{\link{powell_wiley}} functions input. } \section{Dependencies}{ - The 'ndi' package relies heavily upon \code{\link{tidycensus}} to retrieve data from the U.S. Census Bureau American Community Survey five-year estimates and the \code{\link{psych}} for computing the neighborhood deprivation indices. The \code{\link{messer}} function builds upon code developed by Hruska et al. (2022) \doi{10.17605/OSF.IO/M2SAV} by fictionalizing, adding the percent of households earning <$30,000 per year to the NDI computation, and providing the option for computing the ACS-5 2006-2010 NDI values. There is no code companion to compute NDI included in Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} or Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002}, but the package author worked directly with the Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} authors to replicate their SAS code in R. The spatial metrics RI and EI rely on the \code{\link{sf}} and \code{\link{Matrix}} packages to compute the geospatial adjacency matrix between census geographies. Internal function to calculate AI is based on \code{\link[DescTools]{Atkinson}} function. There is no code companion to compute RI, EI, or DI included in Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002}, Bravo et al. (2021) \doi{10.3390/ijerph18179384}, or Duncan & Duncan (1955) \doi{10.2307/2088328}, respectively. + The 'ndi' package relies heavily upon \code{\link{tidycensus}} to retrieve data from the U.S. Census Bureau American Community Survey five-year estimates and the \code{\link{psych}} for computing the neighborhood deprivation indices. The \code{\link{messer}} function builds upon code developed by Hruska et al. (2022) \doi{10.17605/OSF.IO/M2SAV} by fictionalizing, adding the percent of households earning <$30,000 per year to the NDI computation, and providing the option for computing the ACS-5 2006-2010 NDI values. There is no code companion to compute NDI included in Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} or Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002}, but the package author worked directly with the Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} authors to replicate their SAS code in R. The spatial metrics RI and EI rely on the \code{\link{sf}} and \code{\link{Matrix}} packages to compute the geospatial adjacency matrix between census geographies. Internal function to calculate AI is based on \code{\link[DescTools]{Atkinson}} function. There is no code companion to compute RI, EI, DI, II, V, LQ, or LEx/Is included in Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002}, Bravo et al. (2021) \doi{10.3390/ijerph18179384}, Duncan & Duncan (1955) \doi{10.2307/2088328}, Bell (1954) \doi{10.2307/2574118}, White (1986) \doi{10.2307/3644339}, Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}, or Bemanian & Beyer (2017) \doi{10.1158/1055-9965.EPI-16-0926}, respectively. } +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/idblr/ndi} + \item Report bugs at \url{https://github.com/idblr/ndi/issues} +} + +} \author{ -Ian D. Buller\cr \emph{Social & Scientific Systems, Inc., a division of DLH Corporation, Silver Spring, Maryland, USA (current); Occupational and Environmental Epidemiology Branch, Division of Cancer Epidemiology and Genetics, National Cancer Institute, National Institutes of Health, Rockville, Maryland, USA (original).} \cr +Ian D. Buller\cr \emph{DLH Corporation (formerly Social & Scientific Systems, Inc.), Bethesda, Maryland, USA (current); Occupational and Environmental Epidemiology Branch, Division of Cancer Epidemiology and Genetics, National Cancer Institute, National Institutes of Health, Rockville, Maryland, USA (original).} \cr Maintainer: I.D.B. \email{ian.buller@alumni.emory.edu} } -\keyword{package} +\keyword{internal} diff --git a/man/powell_wiley.Rd b/man/powell_wiley.Rd index ec1b26d..ab6bdbd 100644 --- a/man/powell_wiley.Rd +++ b/man/powell_wiley.Rd @@ -15,7 +15,7 @@ powell_wiley( ) } \arguments{ -\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}.} +\item{geo}{Character string specifying the geography of the data either census tracts \code{geo = 'tract'} (the default) or counties \code{geo = 'county'}.} \item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2010 onward are currently available.} @@ -23,7 +23,7 @@ powell_wiley( \item{quiet}{Logical. If TRUE, will display messages about potential missing census information, standardized Cronbach's alpha, and proportion of variance explained by principal component analysis. The default is FALSE.} -\item{round_output}{Logical. If TRUE, will round the output of raw census and NDI values from the \code{\link[tidycensus]{get_acs}} at one and four significant digits, respectively. The default is FALSE.} +\item{round_output}{Logical. If TRUE, will round the output of raw census and \emph{NDI} values from the \code{\link[tidycensus]{get_acs}} at one and four significant digits, respectively. The default is FALSE.} \item{df}{Optional. Pass a pre-formatted \code{'dataframe'} or \code{'tibble'} with the desired variables through the function. Bypasses the data obtained by \code{\link[tidycensus]{get_acs}}. The default is NULL. See Details below.} @@ -33,9 +33,9 @@ powell_wiley( An object of class 'list'. This is a named list with the following components: \describe{ -\item{\code{ndi}}{An object of class 'tbl' for the GEOID, name, NDI continuous, NDI quintiles, and raw census values of specified census geographies.} -\item{\code{pca}}{An object of class 'principal', returns the output of \code{\link[psych]{principal}} used to compute the NDI values.} -\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute NDI.} +\item{\code{ndi}}{An object of class 'tbl' for the GEOID, name, \emph{NDI} continuous, \emph{NDI} quintiles, and raw census values of specified census geographies.} +\item{\code{pca}}{An object of class 'principal', returns the output of \code{\link[psych]{principal}} used to compute the \emph{NDI} values.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{NDI}.} \item{\code{cronbach}}{An object of class 'character' or 'numeric' for the results of the Cronbach's alpha calculation. If only one factor is computed, a message is returned. If more than one factor is computed, Cronbach's alpha is calculated and should check that it is >0.7 for respectable internal consistency between factors.} } } @@ -43,28 +43,28 @@ An object of class 'list'. This is a named list with the following components: Compute the aspatial Neighborhood Deprivation Index (Powell-Wiley). } \details{ -This function will compute the aspatial Neighborhood Deprivation Index (NDI) of U.S. census tracts or counties for a specified geographical referent (e.g., US-standardized) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002}. +This function will compute the aspatial Neighborhood Deprivation Index (\emph{NDI}) of U.S. census tracts or counties for a specified geographical referent (e.g., US-standardized) based on Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002}. The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for computation involving a factor analysis with the \code{\link[psych]{principal}} function. The yearly estimates are available in 2010 and after when all census characteristics became available. The thirteen characteristics chosen by Roux and Mair (2010) \doi{10.1111/j.1749-6632.2009.05333.x} are: \itemize{ -\item{MedHHInc (5B19013): }{median household income (dollars)} -\item{PctRecvIDR (B19054): }{percent of households receiving dividends, interest, or rental income} -\item{PctPubAsst (B19058): }{percent of households receiving public assistance} -\item{MedHomeVal (B25077): }{median home value (dollars)} -\item{PctMgmtBusScArti (C24060): }{percent in a management, business, science, or arts occupation} -\item{PctFemHeadKids (B11005): }{percent of households that are female headed with any children under 18 years} -\item{PctOwnerOcc (DP04): }{percent of housing units that are owner occupied} -\item{PctNoPhone (DP04): }{percent of households without a telephone} -\item{PctNComPlm (DP04): }{percent of households without complete plumbing facilities} -\item{PctEducHSPlus (S1501): }{percent with a high school degree or higher (population 25 years and over)} -\item{PctEducBchPlus (S1501): }{percent with a college degree or higher (population 25 years and over)} -\item{PctFamBelowPov (S1702): }{percent of families with incomes below the poverty level} -\item{PctUnempl (S2301): }{percent unemployed} +\item \strong{MedHHInc (B19013)}: median household income (dollars) +\item \strong{PctRecvIDR (B19054)}: percent of households receiving dividends, interest, or rental income +\item \strong{PctPubAsst (B19058)}: percent of households receiving public assistance +\item \strong{MedHomeVal (B25077)}: median home value (dollars) +\item \strong{PctMgmtBusScArti (C24060)}: percent in a management, business, science, or arts occupation +\item \strong{PctFemHeadKids (B11005)}: percent of households that are female headed with any children under 18 years +\item \strong{PctOwnerOcc (DP04)}: percent of housing units that are owner occupied +\item \strong{PctNoPhone (DP04)}: percent of households without a telephone +\item \strong{PctNComPlm (DP04)}: percent of households without complete plumbing facilities +\item \strong{PctEducHSPlus (S1501)}: percent with a high school degree or higher (population 25 years and over) +\item \strong{PctEducBchPlus (S1501)}: percent with a college degree or higher (population 25 years and over) +\item \strong{PctFamBelowPov (S1702)}: percent of families with incomes below the poverty level +\item \strong{PctUnempl (S2301)}: percent unemployed } -Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify the referent for standardizing the NDI (Powell-Wiley) values. For example, if all U.S. states are specified for the \code{state} argument, then the output would be a U.S.-standardized index. Please note: the NDI (Powell-Wiley) values will not exactly match (but will highly correlate with) those found in Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} because the two studies used a different statistical platform (i.e., SPSS and SAS, respectively) that intrinsically calculate the principal component analysis differently from R. +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify the referent for standardizing the \emph{NDI} (Powell-Wiley) values. For example, if all U.S. states are specified for the \code{state} argument, then the output would be a U.S.-standardized index. Please note: the \emph{NDI} (Powell-Wiley) values will not exactly match (but will highly correlate with) those found in Andrews et al. (2020) \doi{10.1080/17445647.2020.1750066} and Slotman et al. (2022) \doi{10.1016/j.dib.2022.108002} because the two studies used a different statistical platform (i.e., SPSS and SAS, respectively) that intrinsically calculate the principal component analysis differently from R. -The categorical NDI (Powell-Wiley) values are population-weighted quintiles of the continuous NDI (Powell-Wiley) values. +The categorical \emph{NDI} (Powell-Wiley) values are population-weighted quintiles of the continuous \emph{NDI} (Powell-Wiley) values. Check if the proportion of variance explained by the first principal component is high (more than 0.5). @@ -76,16 +76,13 @@ powell_wiley(df = DCtracts2020[ , -c(3:10)]) \dontrun{ # Wrapped in \dontrun{} because these examples require a Census API key. - - # Tract-level metric (2020) - powell_wiley(geo = "tract", state = "GA", year = 2020) - - # Impute NDI for tracts (2020) with missing census information (median values) - powell_wiley(state = "tract", "GA", year = 2020, imp = TRUE) - - # County-level metric (2020) - powell_wiley(geo = "county", state = "GA", year = 2020) - + + # Tract-level NDI (Powell-Wiley; 2020) + powell_wiley(geo = 'tract', state = 'GA', year = 2020) + + # Impute NDI (Powell-Wiley; 2020) for tracts with missing census information (median values) + powell_wiley(state = 'tract', state = 'GA', year = 2020, imp = TRUE) + } } diff --git a/man/sudano.Rd b/man/sudano.Rd new file mode 100644 index 0000000..4f0b1c3 --- /dev/null +++ b/man/sudano.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sudano.R +\name{sudano} +\alias{sudano} +\title{Location Quotient based on Merton (1938) and Sudano et al. (2013)} +\usage{ +sudano( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial/ethnic subgroup(s). See Details for available choices.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{lq}}{An object of class 'tbl' for the GEOID, name, and \emph{LQ} at specified smaller census geographies.} +\item{\code{lq_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{LQ}.} +} +} +\description{ +Compute the aspatial Location Quotient (Sudano) of a selected racial/ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute the aspatial Location Quotient (\emph{LQ}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. This function provides the computation of \emph{LQ} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{LQ} is some measure of relative racial homogeneity of each smaller geography within a larger geography. \emph{LQ} can range in value from 0 to infinity because it is ratio of two proportions in which the numerator is the proportion of subgroup population in a smaller geography and the denominator is the proportion of subgroup population in its larger geography. For example, a smaller geography with an \emph{LQ} of 5 means that the proportion of the subgroup population living in the smaller geography is five times the proportion of the subgroup population in its larger geography. + +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{LQ} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{LQ} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{LQ} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Location Quotient (a measure of relative homogeneity) of Black populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + sudano( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/man/white.Rd b/man/white.Rd new file mode 100644 index 0000000..d17d21e --- /dev/null +++ b/man/white.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/white.R +\name{white} +\alias{white} +\title{Correlation Ratio based on Bell (1954) and White (1986)} +\usage{ +white( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial/ethnic subgroup(s). See Details for available choices.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{v}}{An object of class 'tbl' for the GEOID, name, and \emph{V} at specified larger census geographies.} +\item{\code{v_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{V}.} +} +} +\description{ +Compute the aspatial Correlation Ratio (White) of a selected racial/ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute the aspatial Correlation Ratio (\emph{V} or \eqn{Eta^{2}}{Eta^2}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Bell (1954) \doi{10.2307/2574118} and White (1986) \doi{10.2307/3644339}. This function provides the computation of \emph{V} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the aspatial computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{V} removes the asymmetry from the Isolation Index (Bell) by controlling for the effect of population composition. The Isolation Index (Bell) is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s) with higher values signifying higher probability of interaction (less isolation). \emph{V} can range in value from 0 to Inf. + +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{V} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{V} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Correlation Ratio (a measure of isolation) of Black populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + white( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/man/white_blau.Rd b/man/white_blau.Rd new file mode 100644 index 0000000..6fd55c9 --- /dev/null +++ b/man/white_blau.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/white_blau.R +\name{white_blau} +\alias{white_blau} +\title{An index of spatial proximity based on White (1986) and Blau (1977)} +\usage{ +white_blau( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + subgroup_ref, + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_large = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial/ethnic subgroup(s) as the comparison population. See Details for available choices.} + +\item{subgroup_ref}{Character string specifying the racial/ethnic subgroup(s) as the reference population. See Details for available choices.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{sp}}{An object of class 'tbl' for the GEOID, name, and \emph{SP} at specified larger census geographies.} +\item{\code{sp_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{SP}.} +} +} +\description{ +Compute an index of spatial proximity (White) of a selected racial/ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute an index of spatial proximity (\emph{SP}) of selected racial/ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on White (1986) \doi{10.2307/3644339} and Blau (1977; ISBN-13:978-0-029-03660-0). This function provides the computation of \emph{SP} for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'csa'} or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{SP} is a measure of clustering of racial/ethnic populations within smaller geographical areas that are located within larger geographical areas. \emph{SP} can range in value from 0 to Inf and represents the degree to which an area is a racial or ethnic enclave. A value of 1 indicates there is no differential clustering between subgroup and referent group members. A value greater than 1 indicates subgroup members live nearer to one another than to referent subgroup members. A value less than 1 indicates subgroup live nearer to and referent subgroup members than to their own subgroup members. + +The metric uses the exponential transform of a distance matrix (kilometers) between smaller geographical area centroids, with a diagonal defined as \code{(0.6*a_{i})^{0.5}} where \code{a_{i}} is the area (square kilometers) of smaller geographical unit \code{i} as defined by White (1983) \doi{10.1086/227768}. + +Larger geographies available include state \code{geo_large = 'state'}, county \code{geo_large = 'county'}, census tract \code{geo_large = 'tract'}, Core Based Statistical Area \code{geo_large = 'cbsa'}, Combined Statistical Area \code{geo_large = 'csa'}, and Metropolitan Division \code{geo_large = 'metro'} levels. Smaller geographies available include, county \code{geo_small = 'county'}, census tract \code{geo_small = 'tract'}, and census block group \code{geo_small = 'block group'} levels. If a larger geographical area is comprised of only one smaller geographical area (e.g., a U.S county contains only one census tract), then the \emph{SP} value returned is NA. If the larger geographical unit is Combined Based Statistical Areas \code{geo_large = 'csa'} or Core Based Statistical Areas \code{geo_large = 'cbsa'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{SP} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Index of spatial proximity of non-Hispanic Black vs. non-Hispanic white populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + white_blau( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW' + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/tests/testthat.R b/tests/testthat.R index e78b8e8..40f13cf 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library(testthat) library(ndi) -test_check("ndi") +test_check('ndi') diff --git a/tests/testthat/test-anthopolos.R b/tests/testthat/test-anthopolos.R index 5b368fb..41ba7ac 100644 --- a/tests/testthat/test-anthopolos.R +++ b/tests/testthat/test-anthopolos.R @@ -1,37 +1,68 @@ -context("anthopolos") +context('anthopolos') -####################### +# ------------------- # # anthopolos testthat # -####################### +# ------------------- # -test_that("anthopolos throws error with invalid arguments", { - +test_that('anthopolos throws error with invalid arguments', { # Unavailable geography - expect_error(anthopolos(geo = "zcta", state = "DC", year = 2020, subgroup = "NHoLB", quiet = TRUE)) + expect_error(anthopolos( + geo = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) # Unavailable year - expect_error(anthopolos(state = "DC", year = 2005, subgroup = "NHoLB", quiet = TRUE)) + expect_error(anthopolos( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + quiet = TRUE + )) # Unavailable subgroup - expect_error(anthopolos(state = "DC", year = 2020, subgroup = "terran", quiet = TRUE)) + expect_error(anthopolos( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + )) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') # Incorrect state - expect_error(anthopolos(state = "AB", year = 2020, subgroup = "NHoLB", quiet = TRUE)) + expect_error(anthopolos( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) -} -) +}) -test_that("anthopolos works", { - - skip_if(Sys.getenv("CENSUS_API_KEY") == "") +test_that('anthopolos works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') - expect_output(anthopolos(state = "DC", year = 2020, subgroup = c("NHoLB", "HoLB"))) + expect_output(anthopolos( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + )) - expect_silent(anthopolos(state = "DC", year = 2020, subgroup = "NHoLB", quiet = TRUE)) + expect_silent(anthopolos( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) - expect_silent(anthopolos(state = "DC", year = 2020, subgroup = c("NHoLB", "HoLB"), quiet = TRUE)) + expect_silent(anthopolos( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + quiet = TRUE + )) -} -) +}) diff --git a/tests/testthat/test-atkinson.R b/tests/testthat/test-atkinson.R index 0695241..419c0a0 100644 --- a/tests/testthat/test-atkinson.R +++ b/tests/testthat/test-atkinson.R @@ -1,50 +1,86 @@ -context("atkinson") +context('atkinson') -##################### +# ----------------- # # atkinson testthat # -##################### +# ----------------- # -test_that("atkinson throws error with invalid arguments", { - +test_that('atkinson throws error with invalid arguments', { # Unavailable geography - expect_error(atkinson(geo_small = "zcta", state = "DC", year = 2020, - subgroup = "NHoLB", quiet = TRUE)) - expect_error(atkinson(geo_large = "block group", state = "DC", year = 2020, - subgroup = "NHoLB", quiet = TRUE)) + expect_error(atkinson( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + expect_error( + atkinson( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) # Unavailable year - expect_error(atkinson(state = "DC", year = 2005, - subgroup = "NHoLB", quiet = TRUE)) + expect_error(atkinson( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + quiet = TRUE + )) # Unavailable subgroup - expect_error(atkinson(state = "DC", year = 2020, - subgroup = "terran", quiet = TRUE)) + expect_error(atkinson( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + )) # Incorrect epsilon - expect_error(atkinson(state = "DC", year = 2020, - subgroup = "NHoLB", epsilon = 2, quiet = TRUE)) + expect_error(atkinson( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + epsilon = 2, + quiet = TRUE + )) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') # Incorrect state - expect_error(atkinson(state = "AB", year = 2020, - subgroup = "NHoLB", quiet = TRUE)) + expect_error(atkinson( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) -} -) +}) -test_that("atkinson works", { - - skip_if(Sys.getenv("CENSUS_API_KEY") == "") +test_that('atkinson works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') - expect_silent(atkinson(state = "DC", year = 2020, - subgroup = c("NHoLB", "HoLB"))) + expect_silent(atkinson( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + )) - expect_silent(atkinson(state = "DC", year = 2020, - subgroup = "NHoLB", quiet = TRUE)) + expect_silent(atkinson( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) - expect_silent(atkinson(state = "DC", year = 2020, - subgroup = c("NHoLB", "HoLB"), quiet = TRUE)) + expect_silent(atkinson( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + quiet = TRUE + )) -} -) +}) diff --git a/tests/testthat/test-bell.R b/tests/testthat/test-bell.R new file mode 100644 index 0000000..f7bc18f --- /dev/null +++ b/tests/testthat/test-bell.R @@ -0,0 +1,94 @@ +context('bell') + +# ------------- # +# bell testthat # +# ------------- # + +test_that('bell throws error with invalid arguments', { + # Unavailable geography + expect_error( + bell( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + ) + ) + expect_error( + bell( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error(bell( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + )) + + # Unavailable subgroup + expect_error(bell( + state = 'DC', + year = 2020, + subgroup = 'terran', + subgroup_ixn = 'NHoLW', + quiet = TRUE + )) + expect_error(bell( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'terran', + quiet = TRUE + )) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error(bell( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + )) + +}) + +test_that('bell works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_silent(bell( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + subgroup_ixn = c('NHoLW', 'HoLW') + )) + + expect_silent(bell( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + )) + + expect_silent(bell( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + subgroup_ixn = c('NHoLW', 'HoLW'), + quiet = TRUE + )) + +}) diff --git a/tests/testthat/test-bemanian_beyer.R b/tests/testthat/test-bemanian_beyer.R new file mode 100644 index 0000000..2cd0509 --- /dev/null +++ b/tests/testthat/test-bemanian_beyer.R @@ -0,0 +1,104 @@ +context('bemanian_beyer') + +# ----------------------- # +# bemanian_beyer testthat # +# ----------------------- # + +test_that('bemanian_beyer throws error with invalid arguments', { + # Unavailable geography + expect_error( + bemanian_beyer( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + ) + ) + expect_error( + bemanian_beyer( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error( + bemanian_beyer( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + ) + ) + + # Unavailable subgroup + expect_error( + bemanian_beyer( + state = 'DC', + year = 2020, + subgroup = 'terran', + subgroup_ixn = 'NHoLW', + quiet = TRUE + ) + ) + expect_error( + bemanian_beyer( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'terran', + quiet = TRUE + ) + ) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error( + bemanian_beyer( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + ) + ) + +}) + +test_that('bemanian_beyer works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_warning(bemanian_beyer( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + subgroup_ixn = c('NHoLW', 'HoLW') + )) + + expect_warning( + bemanian_beyer( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW', + quiet = TRUE + ) + ) + + expect_warning(bemanian_beyer( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + subgroup_ixn = c('NHoLW', 'HoLW'), + quiet = TRUE + )) + +}) diff --git a/tests/testthat/test-bravo.R b/tests/testthat/test-bravo.R index 662e1d0..60efbe0 100644 --- a/tests/testthat/test-bravo.R +++ b/tests/testthat/test-bravo.R @@ -1,37 +1,68 @@ -context("bravo") +context('bravo') -################## +# -------------- # # bravo testthat # -################## +# -------------- # -test_that("bravo throws error with invalid arguments", { - +test_that('bravo throws error with invalid arguments', { # Unavailable geography - expect_error(bravo(geo = "zcta", state = "DC", year = 2020, subgroup = "LtHS", quiet = TRUE)) + expect_error(bravo( + geo = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'LtHS', + quiet = TRUE + )) # Unavailable year - expect_error(bravo(state = "DC", year = 2005, subgroup = "LtHS", quiet = TRUE)) + expect_error(bravo( + state = 'DC', + year = 2005, + subgroup = 'LtHS', + quiet = TRUE + )) # Unavailable subgroup - expect_error(bravo(state = "DC", year = 2020, subgroup = "terran", quiet = TRUE)) + expect_error(bravo( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + )) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') # Incorrect state - expect_error(bravo(state = "AB", year = 2020, subgroup = "LtHS", quiet = TRUE)) + expect_error(bravo( + state = 'AB', + year = 2020, + subgroup = 'LtHS', + quiet = TRUE + )) -} -) +}) -test_that("bravo works", { - - skip_if(Sys.getenv("CENSUS_API_KEY") == "") +test_that('bravo works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') - expect_output(bravo(state = "DC", year = 2009, subgroup = c("LtHS", "HSGiE"))) + expect_output(bravo( + state = 'DC', + year = 2009, + subgroup = c('LtHS', 'HSGiE') + )) - expect_silent(bravo(state = "DC", year = 2020, subgroup = "LtHS", quiet = TRUE)) + expect_silent(bravo( + state = 'DC', + year = 2020, + subgroup = 'LtHS', + quiet = TRUE + )) - expect_silent(bravo(state = "DC", year = 2020, subgroup = c("LtHS", "HSGiE"), quiet = TRUE)) + expect_silent(bravo( + state = 'DC', + year = 2020, + subgroup = c('LtHS', 'HSGiE'), + quiet = TRUE + )) -} -) +}) diff --git a/tests/testthat/test-duncan.R b/tests/testthat/test-duncan.R index d895140..da49e0e 100644 --- a/tests/testthat/test-duncan.R +++ b/tests/testthat/test-duncan.R @@ -1,48 +1,104 @@ -context("duncan") +context('duncan') -################### +# --------------- # # duncan testthat # -################### +# --------------- # -test_that("duncan throws error with invalid arguments", { - +test_that('duncan throws error with invalid arguments', { # Unavailable geography - expect_error(duncan(geo_small = "zcta", state = "DC", year = 2020, - subgroup = "NHoLB", subgroup_ref = "NHoLW", quiet = TRUE)) - expect_error(duncan(geo_large = "block group", state = "DC", year = 2020, - subgroup = "NHoLB", subgroup_ref = "NHoLW", quiet = TRUE)) + expect_error( + duncan( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) + expect_error( + duncan( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) # Unavailable year - expect_error(duncan(state = "DC", year = 2005, - subgroup = "NHoLB", subgroup_ref = "NHoLW", quiet = TRUE)) + expect_error( + duncan( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) # Unavailable subgroup - expect_error(duncan(state = "DC", year = 2020, - subgroup = "terran", subgroup_ref = "NHoLW", quiet = TRUE)) - expect_error(duncan(state = "DC", year = 2020, - subgroup = "NHoLB", subgroup_ref = "terran", quiet = TRUE)) + expect_error( + duncan( + state = 'DC', + year = 2020, + subgroup = 'terran', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) + expect_error( + duncan( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'terran', + quiet = TRUE + ) + ) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') # Incorrect state - expect_error(duncan(state = "AB", year = 2020, - subgroup = "NHoLB", subgroup_ref = "NHoLW", quiet = TRUE)) + expect_error( + duncan( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) -} -) +}) -test_that("duncan works", { - - skip_if(Sys.getenv("CENSUS_API_KEY") == "") +test_that('duncan works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') - expect_silent(duncan(state = "DC", year = 2020, - subgroup = c("NHoLB", "HoLB"), subgroup_ref = c("NHoLW", "HoLW"))) + expect_silent(duncan( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + subgroup_ref = c('NHoLW', 'HoLW') + )) - expect_silent(duncan(state = "DC", year = 2020, - subgroup = "NHoLB", subgroup_ref = "NHoLW", quiet = TRUE)) + expect_silent( + duncan( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) - expect_silent(duncan(state = "DC", year = 2020, - subgroup = c("NHoLB", "HoLB"), subgroup_ref = c("NHoLW", "HoLW"), quiet = TRUE)) + expect_silent(duncan( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + subgroup_ref = c('NHoLW', 'HoLW'), + quiet = TRUE + )) -} -) +}) diff --git a/tests/testthat/test-gini.R b/tests/testthat/test-gini.R index 957d982..e4b7b23 100644 --- a/tests/testthat/test-gini.R +++ b/tests/testthat/test-gini.R @@ -1,35 +1,49 @@ -context("gini") +context('gini') -################# +# ------------- # # gini testthat # -################# +# ------------- # -test_that("gini throws error with invalid arguments", { - +test_that('gini throws error with invalid arguments', { # Unavailable geography - expect_error(gini(geo = "zcta", state = "DC", year = 2020, quiet = TRUE)) + expect_error(gini( + geo = 'zcta', + state = 'DC', + year = 2020, + quiet = TRUE + )) # Unavailable year - expect_error(gini(state = "DC", year = 2005, quiet = TRUE)) + expect_error(gini( + state = 'DC', + year = 2005, + quiet = TRUE + )) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') # Incorrect state - expect_error(gini(state = "AB", year = 2020)) + expect_error(gini(state = 'AB', year = 2020)) # Unavailable geography for DC (only 1 'county' in DC so, alone, NDI cannot be computed) - expect_error(gini(geo = "county", state = "DC", year = 2009, quiet = TRUE)) - -} -) + expect_error(gini( + geo = 'county', + state = 'DC', + year = 2009, + quiet = TRUE + )) + +}) -test_that("gini works", { - - skip_if(Sys.getenv("CENSUS_API_KEY") == "") +test_that('gini works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') - expect_message(gini(state = "DC", year = 2020)) + expect_message(gini(state = 'DC', year = 2020)) - expect_silent(gini(state = "DC", year = 2020, quiet = TRUE)) + expect_silent(gini( + state = 'DC', + year = 2020, + quiet = TRUE + )) -} -) +}) diff --git a/tests/testthat/test-hoover.R b/tests/testthat/test-hoover.R new file mode 100644 index 0000000..994f7d2 --- /dev/null +++ b/tests/testthat/test-hoover.R @@ -0,0 +1,77 @@ +context('hoover') + +# --------------- # +# hoover testthat # +# --------------- # + +test_that('hoover throws error with invalid arguments', { + # Unavailable geography + expect_error(hoover( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + expect_error( + hoover( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error(hoover( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + quiet = TRUE + )) + + # Unavailable subgroup + expect_error(hoover( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + )) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error(hoover( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + +}) + +test_that('hoover works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_silent(hoover( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + )) + + expect_silent(hoover( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + + expect_silent(hoover( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + quiet = TRUE + )) + +}) diff --git a/tests/testthat/test-krieger.R b/tests/testthat/test-krieger.R index c2727e9..5d20b63 100644 --- a/tests/testthat/test-krieger.R +++ b/tests/testthat/test-krieger.R @@ -1,32 +1,45 @@ -context("krieger") +context('krieger') -#################### +# ---------------- # # krieger testthat # -#################### +# ---------------- # -test_that(" throws error with invalid arguments", { - +test_that(' throws error with invalid arguments', { # Unavailable geography - expect_error(krieger(geo = "zcta", state = "DC", year = 2020, quiet = TRUE)) + expect_error(krieger( + geo = 'zcta', + state = 'DC', + year = 2020, + quiet = TRUE + )) # Unavailable year - expect_error(krieger(state = "DC", year = 2005, quiet = TRUE)) + expect_error(krieger( + state = 'DC', + year = 2005, + quiet = TRUE + )) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') # Incorrect state - expect_error(krieger(state = "AB", year = 2020, quiet = TRUE)) + expect_error(krieger( + state = 'AB', + year = 2020, + quiet = TRUE + )) -} -) +}) -test_that("krieger works", { - - skip_if(Sys.getenv("CENSUS_API_KEY") == "") +test_that('krieger works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') - expect_silent(krieger(state = "DC", year = 2020)) + expect_silent(krieger(state = 'DC', year = 2020)) - expect_silent(krieger(state = "DC", year = 2020, quiet = TRUE)) + expect_silent(krieger( + state = 'DC', + year = 2020, + quiet = TRUE + )) -} -) +}) diff --git a/tests/testthat/test-lieberson.R b/tests/testthat/test-lieberson.R new file mode 100644 index 0000000..207feea --- /dev/null +++ b/tests/testthat/test-lieberson.R @@ -0,0 +1,79 @@ +context('lieberson') + +# ------------------ # +# lieberson testthat # +# ------------------ # + +test_that('lieberson throws error with invalid arguments', { + # Unavailable geography + expect_error( + lieberson( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + expect_error( + lieberson( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error(lieberson( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + quiet = TRUE + )) + + # Unavailable subgroup + expect_error(lieberson( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + )) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error(lieberson( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + +}) + +test_that('lieberson works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_silent(lieberson( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + )) + + expect_silent(lieberson( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + + expect_silent(lieberson( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + quiet = TRUE + )) + +}) diff --git a/tests/testthat/test-messer.R b/tests/testthat/test-messer.R index 2193965..698100a 100644 --- a/tests/testthat/test-messer.R +++ b/tests/testthat/test-messer.R @@ -1,46 +1,77 @@ -context("messer") +context('messer') -################### +# --------------- # # messer testthat # -################### +# --------------- # -test_that("messer throws error with invalid arguments", { - +test_that('messer throws error with invalid arguments', { # Not a data.frame or tibble for `df` - expect_error(messer(df = c("a", "b", "c"))) + expect_error(messer(df = c('a', 'b', 'c'))) # Unavailable geography - expect_error(messer(geo = "zcta", state = "DC", year = 2020, quiet = TRUE)) + expect_error(messer( + geo = 'zcta', + state = 'DC', + year = 2020, + quiet = TRUE + )) # Unavailable year - expect_error(messer(state = "DC", year = 2005, quiet = TRUE)) + expect_error(messer( + state = 'DC', + year = 2005, + quiet = TRUE + )) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') # Incorrect state - expect_error(messer(state = "AB", year = 2020, quiet = TRUE)) + expect_error(messer( + state = 'AB', + year = 2020, + quiet = TRUE + )) # Unavailable geography for DC (only 1 'county' in DC so, alone, NDI cannot be computed) - expect_error(messer(geo = "county", state = "DC", year = 2009, quiet = TRUE)) + expect_error(messer( + geo = 'county', + state = 'DC', + year = 2009, + quiet = TRUE + )) -} -) +}) -test_that("messer works", { +test_that('messer works', { + expect_message(messer(df = DCtracts2020[,-c(2, 11:ncol(DCtracts2020))])) - expect_message(messer(df = DCtracts2020[, -c(2, 11:ncol(DCtracts2020))])) + skip_if(Sys.getenv('CENSUS_API_KEY') == '') - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + expect_message(messer(state = 'DC', year = 2020)) - expect_message(messer(state = "DC", year = 2020)) - - expect_message(messer(state = "DC", year = 2020, round_output = TRUE)) + expect_message(messer( + state = 'DC', + year = 2020, + round_output = TRUE + )) - expect_message(messer(state = "DC", year = 2020, imp = TRUE)) + expect_message(messer( + state = 'DC', + year = 2020, + imp = TRUE + )) - expect_silent(messer(state = "DC", year = 2020, quiet = TRUE)) + expect_silent(messer( + state = 'DC', + year = 2020, + quiet = TRUE + )) - expect_silent(messer(state = "DC", year = 2020, imp = TRUE, quiet = TRUE)) + expect_silent(messer( + state = 'DC', + year = 2020, + imp = TRUE, + quiet = TRUE + )) -} -) +}) diff --git a/tests/testthat/test-powell_wiley.R b/tests/testthat/test-powell_wiley.R index 1ff4de4..97f509f 100644 --- a/tests/testthat/test-powell_wiley.R +++ b/tests/testthat/test-powell_wiley.R @@ -1,46 +1,77 @@ -context("powell_wiley") +context('powell_wiley') -######################### +# --------------------- # # powell_wiley testthat # -######################### +# --------------------- # -test_that("powell_wiley throws error with invalid arguments", { - +test_that('powell_wiley throws error with invalid arguments', { # Not a data.frame or tibble for `df` - expect_error(powell_wiley(df = c("a", "b", "c"))) + expect_error(powell_wiley(df = c('a', 'b', 'c'))) # Unavailable geography - expect_error(powell_wiley(geo = "zcta", state = "DC", year = 2020, quiet = TRUE)) + expect_error(powell_wiley( + geo = 'zcta', + state = 'DC', + year = 2020, + quiet = TRUE + )) # Unavailable year - expect_error(powell_wiley(state = "DC", year = 2005, quiet = TRUE)) + expect_error(powell_wiley( + state = 'DC', + year = 2005, + quiet = TRUE + )) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') # Incorrect state - expect_error(powell_wiley(state = "AB", year = 2020, quiet = TRUE)) + expect_error(powell_wiley( + state = 'AB', + year = 2020, + quiet = TRUE + )) # Unavailable geography for DC (only 1 'county' in DC so, alone, NDI cannot be computed) - expect_error(powell_wiley(geo = "county", state = "DC", year = 2009, quiet = TRUE)) - -} -) - -test_that("powell_wiley works", { + expect_error(powell_wiley( + geo = 'county', + state = 'DC', + year = 2009, + quiet = TRUE + )) - expect_message(powell_wiley(df = DCtracts2020[ , -c(3:10)])) +}) + +test_that('powell_wiley works', { + expect_message(powell_wiley(df = DCtracts2020[,-c(3:10)])) - skip_if(Sys.getenv("CENSUS_API_KEY") == "") + skip_if(Sys.getenv('CENSUS_API_KEY') == '') - expect_message(powell_wiley(state = "DC", year = 2020)) + expect_message(powell_wiley(state = 'DC', year = 2020)) - expect_message(powell_wiley(state = "DC", year = 2020, round_output = TRUE)) + expect_message(powell_wiley( + state = 'DC', + year = 2020, + round_output = TRUE + )) - expect_message(powell_wiley(state = "DC", year = 2020, imp = TRUE)) + expect_message(powell_wiley( + state = 'DC', + year = 2020, + imp = TRUE + )) - expect_silent(powell_wiley(state = "DC", year = 2020, quiet = TRUE)) + expect_silent(powell_wiley( + state = 'DC', + year = 2020, + quiet = TRUE + )) - expect_silent(powell_wiley(state = "DC", year = 2020, imp = TRUE, quiet = TRUE)) + expect_silent(powell_wiley( + state = 'DC', + year = 2020, + imp = TRUE, + quiet = TRUE + )) -} -) +}) diff --git a/tests/testthat/test-sudano.R b/tests/testthat/test-sudano.R new file mode 100644 index 0000000..c929055 --- /dev/null +++ b/tests/testthat/test-sudano.R @@ -0,0 +1,77 @@ +context('sudano') + +# --------------- # +# sudano testthat # +# --------------- # + +test_that('sudano throws error with invalid arguments', { + # Unavailable geography + expect_error(sudano( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + expect_error( + sudano( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error(sudano( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + quiet = TRUE + )) + + # Unavailable subgroup + expect_error(sudano( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + )) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error(sudano( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + +}) + +test_that('sudano works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_silent(sudano( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + )) + + expect_silent(sudano( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + + expect_silent(sudano( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + quiet = TRUE + )) + +}) diff --git a/tests/testthat/test-white.R b/tests/testthat/test-white.R new file mode 100644 index 0000000..0e85449 --- /dev/null +++ b/tests/testthat/test-white.R @@ -0,0 +1,77 @@ +context('white') + +# -------------- # +# white testthat # +# -------------- # + +test_that('white throws error with invalid arguments', { + # Unavailable geography + expect_error(white( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + expect_error( + white( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error(white( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + quiet = TRUE + )) + + # Unavailable subgroup + expect_error(white( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + )) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error(white( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + +}) + +test_that('white works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_silent(white( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + )) + + expect_silent(white( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + + expect_silent(white( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + quiet = TRUE + )) + +}) diff --git a/tests/testthat/test-white_blau.R b/tests/testthat/test-white_blau.R new file mode 100644 index 0000000..2beb602 --- /dev/null +++ b/tests/testthat/test-white_blau.R @@ -0,0 +1,104 @@ +context('white_blau') + +# --------------- # +# white_blau testthat # +# --------------- # + +test_that('white_blau throws error with invalid arguments', { + # Unavailable geography + expect_error( + white_blau( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) + expect_error( + white_blau( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error( + white_blau( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) + + # Unavailable subgroup + expect_error( + white_blau( + state = 'DC', + year = 2020, + subgroup = 'terran', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) + expect_error( + white_blau( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'terran', + quiet = TRUE + ) + ) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error( + white_blau( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) + +}) + +test_that('white_blau works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_silent(white_blau( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + subgroup_ref = c('NHoLW', 'HoLW') + )) + + expect_silent( + white_blau( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW', + quiet = TRUE + ) + ) + + expect_silent(white_blau( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + subgroup_ref = c('NHoLW', 'HoLW'), + quiet = TRUE + )) + +}) diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd index 551938b..463adf0 100644 --- a/vignettes/vignette.Rmd +++ b/vignettes/vignette.Rmd @@ -1,7 +1,7 @@ --- -title: "ndi: Neighborhood Deprivation Indices" +title: 'ndi: Neighborhood Deprivation Indices' author: 'Ian D. Buller (GitHub: @idblr)' -date: "`r Sys.Date()`" +date: '`r Sys.Date()`' output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{ndi: Neighborhood Deprivation Indices} @@ -11,31 +11,31 @@ vignette: > ```{r setup, include = FALSE} library(knitr) -knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = FALSE, fig.show = "hold") +knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = FALSE, fig.show = 'hold') ``` Start with the necessary packages for the vignette. ```{r packages, results = 'hide'} -loadedPackages <- c("dplyr", "ggplot2", "ndi", "tidycensus", "tigris") +loadedPackages <- c('dplyr', 'ggplot2', 'ndi', 'sf', 'tidycensus', 'tigris') invisible(lapply(loadedPackages, library, character.only = TRUE)) options(tigris_use_cache = TRUE) ``` -Set your U.S. Census Bureau access key. Follow [this link](http://api.census.gov/data/key_signup.html) to obtain one. Specify your access key in the `messer()` or `powell_wiley()` functions using the `key` argument of the `get_acs()` function from the `tidycensus` package called within each or by using the `census_api_key()` function from the `tidycensus` package before running the `messer()` or `powell_wiley()` functions (see an example of the latter below). +Set your U.S. Census Bureau access key. Follow [this link](http://api.census.gov/data/key_signup.html) to obtain one. Specify your access key in the `messer()` or `powell_wiley()` functions using the `key` argument of the `get_acs()` function from the [*tidycensus*](https://CRAN.R-project.org/package=tidycensus) package called within each or by using the `census_api_key()` function from the [*tidycensus*](https://CRAN.R-project.org/package=tidycensus) package before running the `messer()` or `powell_wiley()` functions (see an example of the latter below). ```{r access_key_private, echo = FALSE} -source("../dev/private_key.R") -tidycensus::census_api_key(private_key) +source(file.path('..', 'dev', 'private_key.R')) +census_api_key(private_key) ``` ```{r access_key_public, eval = FALSE} -tidycensus::census_api_key("...") # INSERT YOUR OWN KEY FROM U.S. CENSUS API +census_api_key('...') # INSERT YOUR OWN KEY FROM U.S. CENSUS API ``` -### Compute NDI (Messer) +### Compute *NDI* (Messer) -Compute the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., census tracts. This metric is based on [Messer et al. (2006)](https://doi.org/10.1007/s11524-006-9094-x) with the following socio-economic status (SES) variables: +Compute the *NDI* (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., census tracts. This metric is based on [Messer et al. (2006)](https://doi.org/10.1007/s11524-006-9094-x) with the following socio-economic status (SES) variables: | Characteristic | SES dimension | ACS table source | Description | | -------------- | ------------- | ---------------- | ----------- | @@ -49,135 +49,163 @@ Compute the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., cens | EMP | Employment | B23001 (2010 only); B23025 (2011 onward) | Percent unemployed | ```{r messer, results = 'hide'} -messer2010GA <- ndi::messer(state = "GA", year = 2010, round_output = TRUE) +messer2010GA <- messer(state = 'GA', year = 2010, round_output = TRUE) ``` -One output from the `messer()` function is a tibble containing the identification, geographic name, NDI (Messer) values, and raw census characteristics for each tract. +One output from the `messer()` function is a tibble containing the identification, geographic name, *NDI* (Messer) values, and raw census characteristics for each tract. ```{r messer_out1} messer2010GA$ndi ``` -A second output from the `messer()` function is the results from the principal component analysis used to compute the NDI (Messer) values. +A second output from the `messer()` function is the results from the principal component analysis used to compute the *NDI* (Messer) values. ```{r messer_out2} messer2010GA$pca ``` -A third output from the `messer()` function is a tibble containing a breakdown of the missingness of the census characteristics used to compute the NDI (Messer) values. +A third output from the `messer()` function is a tibble containing a breakdown of the missingness of the census characteristics used to compute the *NDI* (Messer) values. ```{r messer_out3} messer2010GA$missing ``` -We can visualize the NDI (Messer) values geographically by linking them to spatial information from the `tigris` package and plotting with the `ggplot2` package suite. +We can visualize the *NDI* (Messer) values geographically by linking them to spatial information from the [*tigris*]([*tidycensus*](https://CRAN.R-project.org/package=tigris) package and plotting with the [*ggplot2*]([*tidycensus*](https://CRAN.R-project.org/package=ggplot2) package suite. ```{r messer_prep, results = 'hide'} -# Obtain the 2010 counties from the "tigris" package -county2010GA <- tigris::counties(state = "GA", year = 2010, cb = TRUE) +# Obtain the 2010 counties from the 'tigris' package +county2010GA <- counties(state = 'GA', year = 2010, cb = TRUE) # Remove first 9 characters from GEOID for compatibility with tigris information county2010GA$GEOID <- substring(county2010GA$GEO_ID, 10) -# Obtain the 2010 census tracts from the "tigris" package -tract2010GA <- tigris::tracts(state = "GA", year = 2010, cb = TRUE) +# Obtain the 2010 census tracts from the 'tigris' package +tract2010GA <- tracts(state = 'GA', year = 2010, cb = TRUE) # Remove first 9 characters from GEOID for compatibility with tigris information tract2010GA$GEOID <- substring(tract2010GA$GEO_ID, 10) # Join the NDI (Messer) values to the census tract geometry -GA2010messer <- dplyr::left_join(tract2010GA, messer2010GA$ndi, by = "GEOID") +GA2010messer <- tract2010GA %>% + left_join(messer2010GA$ndi, by = 'GEOID') ``` ```{r messer_plot, fig.height = 7, fig.width = 7} # Visualize the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., census tracts ## Continuous Index -ggplot2::ggplot() + - ggplot2::geom_sf(data = GA2010messer, - ggplot2::aes(fill = NDI), - size = 0.05, - color = "transparent") + - ggplot2::geom_sf(data = county2010GA, - fill = "transparent", - color = "white", - size = 0.2) + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Messer)", - subtitle = "GA census tracts as the referent") +ggplot() + + geom_sf( + data = GA2010messer, + aes(fill = NDI), + size = 0.05, + color = 'transparent' + ) + + geom_sf( + data = county2010GA, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Messer)', + subtitle = 'GA census tracts as the referent' + ) ## Categorical Index -### Rename "9-NDI not avail" level as NA for plotting -GA2010messer$NDIQuartNA <- factor(replace(as.character(GA2010messer$NDIQuart), - GA2010messer$NDIQuart == "9-NDI not avail", NA), - c(levels(GA2010messer$NDIQuart)[-5], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = GA2010messer, - ggplot2::aes(fill = NDIQuartNA), - size = 0.05, - color = "transparent") + - ggplot2::geom_sf(data = county2010GA, - fill = "transparent", - color = "white", - size = 0.2) + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey80") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Messer) Quartiles", - subtitle = "GA census tracts as the referent") -``` - -The results above are at the tract level. The NDI (Messer) values can also be calculated at the county level. +### Rename '9-NDI not avail' level as NA for plotting +GA2010messer$NDIQuartNA <- + factor( + replace( + as.character(GA2010messer$NDIQuart), + GA2010messer$NDIQuart == '9-NDI not avail', + NA + ), + c(levels(GA2010messer$NDIQuart)[-5], NA) + ) + +ggplot() + + geom_sf( + data = GA2010messer, + aes(fill = NDIQuartNA), + size = 0.05, + color = 'transparent' + ) + + geom_sf( + data = county2010GA, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') + + labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Messer) Quartiles', + subtitle = 'GA census tracts as the referent' + ) +``` + +The results above are at the tract level. The *NDI* (Messer) values can also be calculated at the county level. ```{r messer_county_prep, results = 'hide'} -messer2010GA_county <- ndi::messer(geo = "county", state = "GA", year = 2010) +messer2010GA_county <- messer(geo = 'county', state = 'GA', year = 2010) # Join the NDI (Messer) values to the county geometry -GA2010messer_county <- dplyr::left_join(county2010GA, messer2010GA_county$ndi, by = "GEOID") +GA2010messer_county <- county2010GA %>% + left_join(messer2010GA_county$ndi, by = 'GEOID') ``` ```{r messer_county_plot, fig.height = 7, fig.width = 7} # Visualize the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., counties ## Continuous Index -ggplot2::ggplot() + - ggplot2::geom_sf(data = GA2010messer_county, - ggplot2::aes(fill = NDI), - size = 0.20, - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Messer)", - subtitle = "GA counties as the referent") +ggplot() + + geom_sf( + data = GA2010messer_county, + aes(fill = NDI), + size = 0.20, + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Messer)', + subtitle = 'GA counties as the referent' + ) ## Categorical Index -### Rename "9-NDI not avail" level as NA for plotting -GA2010messer_county$NDIQuartNA <- factor(replace(as.character(GA2010messer_county$NDIQuart), - GA2010messer_county$NDIQuart == "9-NDI not avail", NA), - c(levels(GA2010messer_county$NDIQuart)[-5], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = GA2010messer_county, - ggplot2::aes(fill = NDIQuartNA), - size = 0.20, - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey80") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Messer) Quartiles", - subtitle = "GA counties as the referent") +### Rename '9-NDI not avail' level as NA for plotting +GA2010messer_county$NDIQuartNA <- + factor( + replace( + as.character(GA2010messer_county$NDIQuart), + GA2010messer_county$NDIQuart == '9-NDI not avail', + NA + ), + c(levels(GA2010messer_county$NDIQuart)[-5], NA) + ) + +ggplot() + + geom_sf( + data = GA2010messer_county, + aes(fill = NDIQuartNA), + size = 0.20, + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') + + labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Messer) Quartiles', + subtitle = 'GA counties as the referent' + ) ``` -### Compute NDI (Powell-Wiley) +### Compute *NDI* (Powell-Wiley) -Compute the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., census tracts. This metric is based on [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) and [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) with socio-economic status (SES) variables chosen by [Roux and Mair (2010)](https://doi.org/10.1111/j.1749-6632.2009.05333.x): +Compute the *NDI* (Powell-Wiley) values (2016-2020 5-year ACS) for Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., census tracts. This metric is based on [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) and [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) with socio-economic status (SES) variables chosen by [Roux and Mair (2010)](https://doi.org/10.1111/j.1749-6632.2009.05333.x): | Characteristic | SES dimension | ACS table source | Description | | -------------- | ------------- | ---------------- | ----------- | @@ -195,25 +223,29 @@ Compute the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for Maryland, Virgi | PctFamBelowPov | Wealth and income | S1702 | Percent of families with incomes below the poverty level | | PctUnempl | Occupation | S2301 | Percent unemployed | -More information about the [codebook](https://gis.cancer.gov/research/NeighDeprvIndex_Methods.pdf) and [computation](https://gis.cancer.gov/research/NeighDeprvIndex_Methods.pdf) of the NDI (Powell-Wiley) can be found on a [GIS Portal for Cancer Research](https://gis.cancer.gov/research/files.html#soc-dep) website. +More information about the [codebook](https://gis.cancer.gov/research/NeighDeprvIndex_Methods.pdf) and [computation](https://gis.cancer.gov/research/NeighDeprvIndex_Methods.pdf) of the *NDI* (Powell-Wiley) can be found on a [GIS Portal for Cancer Research](https://gis.cancer.gov/research/files.html#soc-dep) website. ```{r powell_wiley, results = 'hide'} -powell_wiley2020DMVW <- ndi::powell_wiley(state = c("DC", "MD", "VA", "WV"), year = 2020, round_output = TRUE) +powell_wiley2020DMVW <- powell_wiley( + state = c('DC', 'MD', 'VA', 'WV'), + year = 2020, + round_output = TRUE +) ``` -One output from the `powell_wiley()` function is a tibble containing the identification, geographic name, NDI (Powell-Wiley) values, and raw census characteristics for each tract. +One output from the `powell_wiley()` function is a tibble containing the identification, geographic name, *NDI* (Powell-Wiley) values, and raw census characteristics for each tract. ```{r powell_wiley_out1} powell_wiley2020DMVW$ndi ``` -A second output from the `powell_wiley()` function is the results from the principal component analysis used to compute the NDI (Powell-Wiley) values. +A second output from the `powell_wiley()` function is the results from the principal component analysis used to compute the *NDI* (Powell-Wiley) values. ```{r powell_wiley_out2} powell_wiley2020DMVW$pca ``` -A third output from the `powell_wiley()` function is a tibble containing a breakdown of the missingness of the census characteristics used to compute the NDI (Powell-Wiley) values. +A third output from the `powell_wiley()` function is a tibble containing a breakdown of the missingness of the census characteristics used to compute the *NDI* (Powell-Wiley) values. ```{r powell_wiley_out3} powell_wiley2020DMVW$missing @@ -225,243 +257,301 @@ A fourth output from the `powell_wiley()` function is a character string or nume powell_wiley2020DMVW$cronbach ``` -We can visualize the NDI (Powell-Wiley) values geographically by linking them to spatial information from the `tigris` package and plotting with the `ggplot2` package suite. +We can visualize the *NDI* (Powell-Wiley) values geographically by linking them to spatial information from the [*tigris*]([*tidycensus*](https://CRAN.R-project.org/package=tigris) package and plotting with the [*ggplot2*]([*tidycensus*](https://CRAN.R-project.org/package=ggplot2) package suite. ```{r powell_wiley_prep, results = 'hide'} -# Obtain the 2020 counties from the "tigris" package -county2020 <- tigris::counties(cb = TRUE) -county2020DMVW <- county2020[county2020$STUSPS %in% c("DC", "MD", "VA", "WV"), ] - -# Obtain the 2020 census tracts from the "tigris" package -tract2020D <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) -tract2020M <- tigris::tracts(state = "MD", year = 2020, cb = TRUE) -tract2020V <- tigris::tracts(state = "VA", year = 2020, cb = TRUE) -tract2020W <- tigris::tracts(state = "WV", year = 2020, cb = TRUE) +# Obtain the 2020 counties from the 'tigris' package +county2020 <- counties(cb = TRUE) +county2020DMVW <- county2020[county2020$STUSPS %in% c('DC', 'MD', 'VA', 'WV'), ] + +# Obtain the 2020 census tracts from the 'tigris' package +tract2020D <- tracts(state = 'DC', year = 2020, cb = TRUE) +tract2020M <- tracts(state = 'MD', year = 2020, cb = TRUE) +tract2020V <- tracts(state = 'VA', year = 2020, cb = TRUE) +tract2020W <- tracts(state = 'WV', year = 2020, cb = TRUE) tracts2020DMVW <- rbind(tract2020D, tract2020M, tract2020V, tract2020W) # Join the NDI (Powell-Wiley) values to the census tract geometry -DMVW2020pw <- dplyr::left_join(tracts2020DMVW, powell_wiley2020DMVW$ndi, by = "GEOID") +DMVW2020pw <- tracts2020DMVW %>% + left_join(powell_wiley2020DMVW$ndi, by = 'GEOID') ``` ```{r powell_wiley_plot, fig.height = 4, fig.width = 7} # Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) ## Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., census tracts ## Continuous Index -ggplot2::ggplot() + - ggplot2::geom_sf(data = DMVW2020pw, - ggplot2::aes(fill = NDI), - color = NA) + - ggplot2::geom_sf(data = county2020DMVW, - fill = "transparent", - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c(na.value = "grey80") + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley)", - subtitle = "DC, MD, VA, and WV tracts as the referent") +ggplot() + + geom_sf( + data = DMVW2020pw, + aes(fill = NDI), + color = NA + ) + + geom_sf( + data = county2020DMVW, + fill = 'transparent', + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_c(na.value = 'grey80') + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Powell-Wiley)', + subtitle = 'DC, MD, VA, and WV tracts as the referent' + ) ## Categorical Index (Population-weighted quintiles) -### Rename "9-NDI not avail" level as NA for plotting -DMVW2020pw$NDIQuintNA <- factor(replace(as.character(DMVW2020pw$NDIQuint), - DMVW2020pw$NDIQuint == "9-NDI not avail", NA), - c(levels(DMVW2020pw$NDIQuint)[-6], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = DMVW2020pw, - ggplot2::aes(fill = NDIQuintNA), - color = NA) + - ggplot2::geom_sf(data = county2020DMVW, - fill = "transparent", - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey80") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2016-2020 estimates")+ - ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley) Population-weighted Quintiles", - subtitle = "DC, MD, VA, and WV tracts as the referent") -``` - -Like the NDI (Messer), we also compute county-level NDI (Powell-Wiley). +### Rename '9-NDI not avail' level as NA for plotting +DMVW2020pw$NDIQuintNA <- + factor(replace( + as.character(DMVW2020pw$NDIQuint), + DMVW2020pw$NDIQuint == '9-NDI not avail', + NA + ), + c(levels(DMVW2020pw$NDIQuint)[-6], NA)) + +ggplot() + + geom_sf(data = DMVW2020pw, aes(fill = NDIQuintNA), color = NA) + + geom_sf(data = county2020DMVW, fill = 'transparent', color = 'white') + + theme_minimal() + + scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') + + labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Powell-Wiley) Population-weighted Quintiles', + subtitle = 'DC, MD, VA, and WV tracts as the referent' + ) +``` + +Like the *NDI* (Messer), we also compute county-level *NDI* (Powell-Wiley). ```{r powell_wiley_county_prep, results = 'hide'} -# Obtain the 2020 counties from the "tigris" package -county2020DMVW <- tigris::counties(state = c("DC", "MD", "VA", "WV"), year = 2020, cb = TRUE) +# Obtain the 2020 counties from the 'tigris' package +county2020DMVW <- counties(state = c('DC', 'MD', 'VA', 'WV'), year = 2020, cb = TRUE) # NDI (Powell-Wiley) at the county level (2016-2020) -powell_wiley2020DMVW_county <- ndi::powell_wiley(geo = "county", - state = c("DC", "MD", "VA", "WV"), - year = 2020) +powell_wiley2020DMVW_county <- powell_wiley( + geo = 'county', + state = c('DC', 'MD', 'VA', 'WV'), + year = 2020 +) # Join the NDI (Powell-Wiley) values to the county geometry -DMVW2020pw_county <- dplyr::left_join(county2020DMVW, powell_wiley2020DMVW_county$ndi, by = "GEOID") +DMVW2020pw_county <- county2020DMVW %>% + left_join(powell_wiley2020DMVW_county$ndi, by = 'GEOID') ``` ```{r powell_wiley_county_plot, fig.height = 4, fig.width = 7} # Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) ## Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., counties ## Continuous Index -ggplot2::ggplot() + - ggplot2::geom_sf(data = DMVW2020pw_county, - ggplot2::aes(fill = NDI), - size = 0.20, - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley)", - subtitle = "DC, MD, VA, and WV counties as the referent") +ggplot() + + geom_sf( + data = DMVW2020pw_county, + aes(fill = NDI), + size = 0.20, + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Powell-Wiley)', + subtitle = 'DC, MD, VA, and WV counties as the referent' + ) ## Categorical Index -### Rename "9-NDI not avail" level as NA for plotting -DMVW2020pw_county$NDIQuintNA <- factor(replace(as.character(DMVW2020pw_county$NDIQuint), - DMVW2020pw_county$NDIQuint == "9-NDI not avail", NA), - c(levels(DMVW2020pw_county$NDIQuint)[-6], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = DMVW2020pw_county, - ggplot2::aes(fill = NDIQuint), - size = 0.20, - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey80") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2016-2020 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley) Population-weighted Quintiles", - subtitle = "DC, MD, VA, and WV counties as the referent") +### Rename '9-NDI not avail' level as NA for plotting +DMVW2020pw_county$NDIQuintNA <- + factor( + replace( + as.character(DMVW2020pw_county$NDIQuint), + DMVW2020pw_county$NDIQuint == '9-NDI not avail', + NA + ), + c(levels(DMVW2020pw_county$NDIQuint)[-6], NA) + ) + +ggplot() + + geom_sf( + data = DMVW2020pw_county, + aes(fill = NDIQuint), + size = 0.20, + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') + + labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Powell-Wiley) Population-weighted Quintiles', + subtitle = 'DC, MD, VA, and WV counties as the referent' + ) ``` ### Advanced Features #### Imputing missing census variables -In the `messer()` and `powell_wiley()` functions, missing census characteristics can be imputed using the `missing` and `impute` arguments of the `pca()` function in the `psych` package called within the `messer()` and `powell_wiley()` functions. Impute values using the logical `imp` argument (currently only calls `impute = "median"` by default, which assigns the median values of each missing census variable for a geography). +In the `messer()` and `powell_wiley()` functions, missing census characteristics can be imputed using the `missing` and `impute` arguments of the `pca()` function in the [*psych*](https://CRAN.R-project.org/package=psych) package called within the `messer()` and `powell_wiley()` functions. Impute values using the logical `imp` argument (currently only calls `impute = 'median'` by default, which assigns the median values of each missing census variable for a geography). ```{r powell_wiley_imp, results = 'hide'} -powell_wiley2020DC <- ndi::powell_wiley(state = "DC", year = 2020) # without imputation -powell_wiley2020DCi <- ndi::powell_wiley(state = "DC", year = 2020, imp = TRUE) # with imputation +powell_wiley2020DC <- powell_wiley(state = 'DC', year = 2020) # without imputation +powell_wiley2020DCi <- powell_wiley(state = 'DC', year = 2020, imp = TRUE) # with imputation table(is.na(powell_wiley2020DC$ndi$NDI)) # n=13 tracts without NDI (Powell-Wiley) values table(is.na(powell_wiley2020DCi$ndi$NDI)) # n=0 tracts without NDI (Powell-Wiley) values -# Obtain the 2020 census tracts from the "tigris" package -tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE) +# Obtain the 2020 census tracts from the 'tigris' package +tract2020DC <- tracts(state = 'DC', year = 2020, cb = TRUE) # Join the NDI (Powell-Wiley) values to the census tract geometry -DC2020pw <- dplyr::left_join(tract2020DC, powell_wiley2020DC$ndi, by = "GEOID") -DC2020pw <- dplyr::left_join(DC2020pw, powell_wiley2020DCi$ndi, by = "GEOID", suffix = c("_nonimp", "_imp")) +DC2020pw <- tract2020DC %>% + left_join(powell_wiley2020DC$ndi, by = 'GEOID') +DC2020pw <- DC2020pw %>% + left_join(powell_wiley2020DCi$ndi, by = 'GEOID', suffix = c('_nonimp', '_imp')) ``` ```{r powell_wiley_imp_plot, fig.height = 7, fig.width = 7} -# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for Washington, D.C., census tracts +# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for +## Washington, D.C., census tracts ## Continuous Index -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020pw, - ggplot2::aes(fill = NDI_nonimp), - size = 0.2, - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley), Non-Imputed", - subtitle = "DC census tracts as the referent") - -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020pw, - ggplot2::aes(fill = NDI_imp), - size = 0.2, - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2016-2020 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley), Imputed", - subtitle = "DC census tracts as the referent") +ggplot() + + geom_sf( + data = DC2020pw, + aes(fill = NDI_nonimp), + size = 0.2, + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Powell-Wiley), Non-Imputed', + subtitle = 'DC census tracts as the referent' + ) + +ggplot() + + geom_sf( + data = DC2020pw, + aes(fill = NDI_imp), + size = 0.2, + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Powell-Wiley), Imputed', + subtitle = 'DC census tracts as the referent' + ) ## Categorical Index -### Rename "9-NDI not avail" level as NA for plotting -DC2020pw$NDIQuintNA_nonimp <- factor(replace(as.character(DC2020pw$NDIQuint_nonimp), - DC2020pw$NDIQuint_nonimp == "9-NDI not avail", NA), - c(levels(DC2020pw$NDIQuint_nonimp)[-6], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020pw, - ggplot2::aes(fill = NDIQuintNA_nonimp), - size = 0.2, - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey80") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2016-2020 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley) Quintiles, Non-Imputed", - subtitle = "DC census tracts as the referent") - -### Rename "9-NDI not avail" level as NA for plotting -DC2020pw$NDIQuintNA_imp <- factor(replace(as.character(DC2020pw$NDIQuint_imp), - DC2020pw$NDIQuint_imp == "9-NDI not avail", NA), - c(levels(DC2020pw$NDIQuint_imp)[-6], NA)) - -ggplot2::ggplot() + - ggplot2::geom_sf(data = DC2020pw, - ggplot2::aes(fill = NDIQuintNA_imp), - size = 0.2, - color = "white") + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE), - na.value = "grey80") + - ggplot2::labs(fill = "Index (Categorical)", - caption = "Source: U.S. Census ACS 2016-2020 estimates") + - ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley) Quintiles, Imputed", - subtitle = "DC census tracts as the referent") +### Rename '9-NDI not avail' level as NA for plotting +DC2020pw$NDIQuintNA_nonimp <- + factor( + replace( + as.character(DC2020pw$NDIQuint_nonimp), + DC2020pw$NDIQuint_nonimp == '9-NDI not avail', + NA + ), + c(levels(DC2020pw$NDIQuint_nonimp)[-6], NA) + ) + +ggplot() + + geom_sf( + data = DC2020pw, + aes(fill = NDIQuintNA_nonimp), + size = 0.2, + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') + + labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Powell-Wiley) Quintiles, Non-Imputed', + subtitle = 'DC census tracts as the referent' + ) + +### Rename '9-NDI not avail' level as NA for plotting +DC2020pw$NDIQuintNA_imp <- + factor( + replace( + as.character(DC2020pw$NDIQuint_imp), + DC2020pw$NDIQuint_imp == '9-NDI not avail', + NA + ), + c(levels(DC2020pw$NDIQuint_imp)[-6], NA) + ) + +ggplot() + + geom_sf( + data = DC2020pw, + aes(fill = NDIQuintNA_imp), + size = 0.2, + color = 'white' + ) + + theme_minimal() + + scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') + + labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') + + ggtitle( + 'Neighborhood Deprivation Index (Powell-Wiley) Quintiles, Imputed', + subtitle = 'DC census tracts as the referent' + ) ``` #### Assign the referent (U.S.-Standardized Metric) -To conduct a contiguous US-standardized index, compute an NDI for all states as in the example below that replicates the nationally standardized NDI (Powell-Wiley) values (2013-2017 ACS-5) found in [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) and available from a [GIS Portal for Cancer Research](https://gis.cancer.gov/research/files.html#soc-dep) website. To replicate the nationally standardized NDI (Powell-Wiley) values (2006-2010 ACS-5) found in [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) change the `year` argument to `2010` (i.e., `year = 2010`). +To conduct a contiguous US-standardized index, compute an *NDI* for all states as in the example below that replicates the nationally standardized *NDI* (Powell-Wiley) values (2013-2017 ACS-5) found in [Slotman et al. (2022)](https://doi.org/10.1016/j.dib.2022.108002) and available from a [GIS Portal for Cancer Research](https://gis.cancer.gov/research/files.html#soc-dep) website. To replicate the nationally standardized *NDI* (Powell-Wiley) values (2006-2010 ACS-5) found in [Andrews et al. (2020)](https://doi.org/10.1080/17445647.2020.1750066) change the `year` argument to `2010` (i.e., `year = 2010`). ```{r national_prep, results = 'hide'} -us <- tigris::states() -n51 <- c("Commonwealth of the Northern Mariana Islands", "Guam", "American Samoa", - "Puerto Rico", "United States Virgin Islands") +us <- states() +n51 <- c( + 'Commonwealth of the Northern Mariana Islands', + 'Guam', + 'American Samoa', + 'Puerto Rico', + 'United States Virgin Islands' +) y51 <- us$STUSPS[!(us$NAME %in% n51)] start_time <- Sys.time() # record start time -powell_wiley2017US <- ndi::powell_wiley(state = y51, year = 2017) +powell_wiley2017US <- powell_wiley(state = y51, year = 2017) end_time <- Sys.time() # record end time time_srr <- end_time - start_time # Calculate run time ``` ```{r national_hist, fig.height = 7, fig.width = 7} -ggplot2::ggplot(powell_wiley2017US$ndi, - ggplot2::aes(x = NDI)) + - ggplot2::geom_histogram(color = "black", - fill = "white") + - ggplot2::theme_minimal() + - ggplot2::ggtitle("Histogram of US-standardized NDI (Powell-Wiley) values (2013-2017)", - subtitle = "U.S. census tracts as the referent (including AK, HI, and DC)") +ggplot(powell_wiley2017US$ndi, aes(x = NDI)) + + geom_histogram(color = 'black', fill = 'white') + + theme_minimal() + + ggtitle( + 'Histogram of US-standardized NDI (Powell-Wiley) values (2013-2017)', + subtitle = 'U.S. census tracts as the referent (including AK, HI, and DC)' + ) ``` -The process to compute a US-standardized NDI (Powell-Wiley) took about `r round(time_srr, digits = 1)` minutes to run on a machine with the features listed at the end of the vignette. +The process to compute a US-standardized *NDI* (Powell-Wiley) took about `r round(time_srr, digits = 1)` minutes to run on a machine with the features listed at the end of the vignette. ### Additional metrics socio-economic deprivation and disparity -Since version v0.1.1, the `ndi` package can compute additional metrics of socio-economic deprivation and disparity beyond neighborhood deprivation indices, including: +Since version v0.1.1, the [*ndi*](https://CRAN.R-project.org/package=ndi) package can compute additional metrics of socio-economic deprivation and disparity beyond neighborhood deprivation indices with data from the ACS-5, including: -1. `anthopolos()` function that computes the Racial Isolation Index (RI) based on [Anthopolos et al. (2011)](https://www.doi.org/10.1016/j.sste.2011.06.002) with data from the ACS-5. -2. `bravo()` function that computes the Educational Isolation Index (EI) based on [Bravo et al. (2021)](https://www.doi.org/10.3390/ijerph18179384) with data from the ACS-5. -3. `gini()` function that retrieves the Gini Index based on [Gini (1921)](https://www.doi.org/10.2307/2223319) from the ACS-5. -4. `krieger()` function that computes the Index of Concentration at the Extremes based on based on [Feldman et al. (2015)](https://www.doi.org/10.1136/jech-2015-205728) and [Krieger et al. (2016)](https://www.doi.org/10.2105/AJPH.2015.302955) with data from the ACS-5. 5. `duncan()` function that computes the Dissimilarity Index based on on [Duncan & Duncan (1955)](https://doi.org/10.2307/2088328) with data from the ACS-5. -6. `atkinson()` function that computes the Atkinson Index based on on [Atkinson (1970)](https://doi.org/10.1016/0022-0531(70)90039-6) with data from the ACS-5. +1. `anthopolos()` function that computes the Racial Isolation Index (*RI*) based on [Anthopolos et al. (2011)](https://doi.org/10.1016/j.sste.2011.06.002) +2. `bravo()` function that computes the Educational Isolation Index (*EI*) based on [Bravo et al. (2021)](https://doi.org/10.3390/ijerph18179384) +3. `gini()` function that retrieves the Gini Index (*G*) based on [Gini (1921)](https://doi.org/10.2307/2223319) +4. `krieger()` function that computes the Index of Concentration at the Extremes (*ICE*) based on based on [Feldman et al. (2015)](https://doi.org/10.1136/jech-2015-205728) and [Krieger et al. (2016)](https://doi.org/10.2105/AJPH.2015.302955) +5. `duncan()` function that computes the Dissimilarity Index (*D*) based on [Duncan & Duncan (1955)](https://doi.org/10.2307/2088328) +6. `atkinson()` function that computes the Atkinson Index (*A*) based on [Atkinson (1970)](https://doi.org/10.1016/0022-0531(70)90039-6) +7. `bell()` function that computes the aspatial racial/ethnic Interaction Index (_xPy\*_) based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and [Bell (1954)](https://doi.org/10.2307/2574118) +8. `white()` function that computes the aspatial racial/ethnic Correlation Ratio (*V*) based on [Bell (1954)](https://doi.org/10.2307/2574118) and [White (1986)](https://doi.org/10.2307/3644339) +9. `sudano()` function that computes the aspatial racial/ethnic Location Quotient (*LQ*) based on [Merton (1939)](https://doi.org/10.2307/2084686) and [Sudano et al. (2013)](https://doi.org/10.1016/j.healthplace.2012.09.015) +10. `bemanian_beyer()` function that computes the aspatial racial/ethnic Local Exposure and Isolation (*LEx/Is*) metric based on [Bemanian & Beyer (2017)](https://doi.org/10.1158/1055-9965.EPI-16-0926) +11. `hoover()` function that computes the aspatial racial/ethnic Delta (*DEL*) based on [Hoover (1941)](https://doi.org/10.1017/S0022050700052980) and Duncan et al. (1961; LC:60007089) +12. `white_blau()` function that computes an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) +13. `lieberson()` function that computes the aspatial racial/ethnic Isolation Index (_xPx\*_) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and [Bell (1954)](https://doi.org/10.2307/2574118) -#### Compute Racial Isolation Index (RI) +#### Compute Racial Isolation Index (*RI*) -Compute the RI (Anthopolos) values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts. This metric is based on [Anthopolos et al. (2011)](https://www.doi.org/10.1016/j.sste.2011.06.002) that assessed the racial isolation of the population that identifies as non-Hispanic or Latino, Black or African American alone. Multiple racial/ethnic subgroups are available in the `anthopolos()` function, including: +Compute the spatial *RI* values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts. This metric is based on [Anthopolos et al. (2011)](https://doi.org/10.1016/j.sste.2011.06.002) that assessed the racial isolation of the population that identifies as non-Hispanic or Latino, Black or African American alone. Multiple racial/ethnic subgroups are available in the `anthopolos()` function, including: | ACS table source | racial/ethnic subgroup | character for `subgroup` argument | | -------------- | ------------- | ---------------- | @@ -486,86 +576,102 @@ Compute the RI (Anthopolos) values (2006-2010 5-year ACS) for North Carolina, U. | B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | | B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | -A census geography (and its neighbors) that has nearly all of its population who identify with the specified race/ethnicity subgroup(s) (e.g., Not Hispanic or Latino, Black or African American alone) will have an RI value close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population who identify with the specified race/ethnicity subgroup(s) (e.g., not Not Hispanic or Latino, Black or African American alone) will have an RI value close to 0. +A census geography (and its neighbors) that has nearly all of its population who identify with the specified race/ethnicity subgroup(s) (e.g., Not Hispanic or Latino, Black or African American alone) will have an *RI* value close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population who identify with the specified race/ethnicity subgroup(s) (e.g., not Not Hispanic or Latino, Black or African American alone) will have an *RI* value close to 0. ```{r anthopolos_prep, results = 'hide'} -anthopolos2010NC <- ndi::anthopolos(state = "NC", year = 2010, subgroup = "NHoLB") +anthopolos2010NC <- anthopolos(state = 'NC', year = 2010, subgroup = 'NHoLB') -# Obtain the 2010 census tracts from the "tigris" package -tract2010NC <- tigris::tracts(state = "NC", year = 2010, cb = TRUE) +# Obtain the 2010 census tracts from the 'tigris' package +tract2010NC <- tracts(state = 'NC', year = 2010, cb = TRUE) # Remove first 9 characters from GEOID for compatibility with tigris information tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) -# Obtain the 2010 counties from the "tigris" package -county2010NC <- tigris::counties(state = "NC", year = 2010, cb = TRUE) +# Obtain the 2010 counties from the 'tigris' package +county2010NC <- counties(state = 'NC', year = 2010, cb = TRUE) -# Join the RI (Anthopolos) values to the census tract geometry -NC2010anthopolos <- dplyr::left_join(tract2010NC, anthopolos2010NC$ri, by = "GEOID") +# Join the RI values to the census tract geometry +NC2010anthopolos <- tract2010NC %>% + left_join(anthopolos2010NC$ri, by = 'GEOID') ``` ```{r anthopolos_plot, fig.height = 4, fig.width = 7} -# Visualize the RI (Anthopolos) values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts -ggplot2::ggplot() + - ggplot2::geom_sf(data = NC2010anthopolos, - ggplot2::aes(fill = RI), - size = 0.05, - color = "transparent") + - ggplot2::geom_sf(data = county2010NC, - fill = "transparent", - color = "white", - size = 0.2) + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Racial Isolation Index (Anthopolos), non-Hispanic Black", - subtitle = "NC census tracts (not corrected for edge effects)") -``` - -The current version of the `ndi` package does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and RI values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the RI for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. +# Visualize the RI values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts +ggplot() + + geom_sf( + data = NC2010anthopolos, + aes(fill = RI), + size = 0.05, + color = 'transparent' + ) + + geom_sf( + data = county2010NC, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Racial Isolation Index (Anthopolos), non-Hispanic Black', + subtitle = 'NC census tracts (not corrected for edge effects)' + ) +``` + +The current version of the [*ndi*](https://CRAN.R-project.org/package=ndi) package does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies, and *RI* values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the *RI* for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest. ```{r anthopolos_edge_prep, results = 'hide'} # Compute RI for all census tracts in neighboring states -anthopolos2010GNSTV <- ndi::anthopolos(state = c("GA", "NC", "SC", "TN", "VA"), - year = 2010, subgroup = "NHoLB") +anthopolos2010GNSTV <- anthopolos( + state = c('GA', 'NC', 'SC', 'TN', 'VA'), + year = 2010, + subgroup = 'NHoLB' +) # Crop to only North Carolina, U.S.A. census tracts -anthopolos2010NCe <- anthopolos2010GNSTV$ri[anthopolos2010GNSTV$ri$GEOID %in% anthopolos2010NC$ri$GEOID, ] +anthopolos2010NCe <- anthopolos2010GNSTV$ri[anthopolos2010GNSTV$ri$GEOID %in% + anthopolos2010NC$ri$GEOID, ] -# Obtain the 2010 census tracts from the "tigris" package -tract2010NC <- tigris::tracts(state = "NC", year = 2010, cb = TRUE) +# Obtain the 2010 census tracts from the 'tigris' package +tract2010NC <- tracts(state = 'NC', year = 2010, cb = TRUE) # Remove first 9 characters from GEOID for compatibility with tigris information tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) -# Obtain the 2010 counties from the "tigris" package -county2010NC <- tigris::counties(state = "NC", year = 2010, cb = TRUE) +# Obtain the 2010 counties from the 'tigris' package +county2010NC <- counties(state = 'NC', year = 2010, cb = TRUE) -# Join the RI (Anthopolos) values to the census tract geometry -edgeNC2010anthopolos <- dplyr::left_join(tract2010NC, anthopolos2010NCe, by = "GEOID") +# Join the RI values to the census tract geometry +edgeNC2010anthopolos <- tract2010NC %>% + left_join(anthopolos2010NCe, by = 'GEOID') ``` ```{r anthopolos_edge_plot, fig.height = 4, fig.width = 7} -# Visualize the RI (Anthopolos) values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts -ggplot2::ggplot() + - ggplot2::geom_sf(data = edgeNC2010anthopolos, - ggplot2::aes(fill = RI), - size = 0.05, - color = "transparent") + - ggplot2::geom_sf(data = county2010NC, - fill = "transparent", - color = "white", - size = 0.2) + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Racial Isolation Index (Anthopolos), non-Hispanic Black", - subtitle = "NC census tracts (corrected for interstate edge effects)") -``` - -#### Compute Educational Isolation Index (EI) - -Compute the EI (Bravo) values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts. This metric is based on [Bravo et al. (2021)](https://www.doi.org/10.3390/ijerph18179384) that assessed the educational isolation of the population without a four-year college degree. Multiple educational attainment categories are available in the `bravo()` function, including: +# Visualize the RI values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts +ggplot() + + geom_sf( + data = edgeNC2010anthopolos, + aes(fill = RI), + size = 0.05, + color = 'transparent' + ) + + geom_sf( + data = county2010NC, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Racial Isolation Index (Anthopolos), non-Hispanic Black', + subtitle = 'NC census tracts (corrected for interstate edge effects)' + ) +``` + +#### Compute Educational Isolation Index (*EI*) + +Compute the spatial *EI* (Bravo) values (2006-2010 5-year ACS) for Oklahoma, U.S.A., census tracts. This metric is based on [Bravo et al. (2021)](https://doi.org/10.3390/ijerph18179384) that assessed the educational isolation of the population without a four-year college degree. Multiple educational attainment categories are available in the `bravo()` function, including: | ACS table source | educational attainment category | character for `subgroup` argument | | -------------- | ------------- | ---------------- | @@ -574,184 +680,235 @@ Compute the EI (Bravo) values (2006-2010 5-year ACS) for North Carolina, U.S.A., | B06009_004 | some college or associate's degree | SCoAD | | B06009_005 | Bachelor's degree | BD | | B06009_006 | graduate or professional degree | GoPD | -Note: The ACS-5 data (2005-2009) uses the "B15002" question. +Note: The ACS-5 data (2005-2009) uses the 'B15002' question. -A census geography (and its neighbors) that has nearly all of its population with the specified educational attainment category (e.g., a four-year college degree or more) will have an EI value close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population with the specified educational attainment category (e.g., with a four-year college degree) will have an EI value close to 0. +A census geography (and its neighbors) that has nearly all of its population with the specified educational attainment category (e.g., a four-year college degree or more) will have an *EI* (Bravo) value close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population with the specified educational attainment category (e.g., with a four-year college degree) will have an *EI* (Bravo) value close to 0. ```{r bravo_prep, results = 'hide'} -bravo2010NC <- ndi::bravo(state = "NC", year = 2010, subgroup = c("LtHS", "HSGiE", "SCoAD")) +bravo2010OK <- bravo(state = 'OK', year = 2010, subgroup = c('LtHS', 'HSGiE', 'SCoAD')) -# Obtain the 2010 census tracts from the "tigris" package -tract2010NC <- tigris::tracts(state = "NC", year = 2010, cb = TRUE) +# Obtain the 2010 census tracts from the 'tigris' package +tract2010OK <- tracts(state = 'OK', year = 2010, cb = TRUE) # Remove first 9 characters from GEOID for compatibility with tigris information -tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) +tract2010OK$GEOID <- substring(tract2010OK$GEO_ID, 10) -# Obtain the 2010 counties from the "tigris" package -county2010NC <- tigris::counties(state = "NC", year = 2010, cb = TRUE) +# Obtain the 2010 counties from the 'tigris' package +county2010OK <- counties(state = 'OK', year = 2010, cb = TRUE) -# Join the RI (Bravo) values to the census tract geometry -NC2010bravo <- dplyr::left_join(tract2010NC, bravo2010NC$ei, by = "GEOID") +# Join the EI (Bravo) values to the census tract geometry +OK2010bravo <- tract2010OK %>% + left_join(bravo2010OK$ei, by = 'GEOID') ``` ```{r bravo_plot, fig.height = 4, fig.width = 7} -# Visualize the RI (Bravo) values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts -ggplot2::ggplot() + - ggplot2::geom_sf(data = NC2010bravo, - ggplot2::aes(fill = EI), - size = 0.05, - color = "transparent") + - ggplot2::geom_sf(data = county2010NC, - fill = "transparent", - color = "white", - size = 0.2) + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Educational Isolation Index (Bravo), without a four-year college degree", - subtitle = "NC census tracts (not corrected for edge effects)") +# Visualize the EI (Bravo) values (2006-2010 5-year ACS) for Oklahoma, U.S.A., census tracts +ggplot() + + geom_sf( + data = OK2010bravo, + aes(fill = EI), + size = 0.05, + color = 'transparent' + ) + + geom_sf( + data = county2010OK, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Educational Isolation Index (Bravo), without a four-year college degree', + subtitle = 'OK census tracts (not corrected for edge effects)' + ) ``` -Can correct one source of edge effect in the same manner as shown for the RI (Anthopolos) metric. +Can correct one source of edge effect in the same manner as shown for the *RI* metric. -#### Retrieve the Gini Index +#### Retrieve the Gini Index (*G*) -Retrieve the Gini Index values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts. This metric is based on [Gini (1921)](https://www.doi.org/10.2307/2223319), and the `gini()` function retrieves the estimate from the ACS-5. +Retrieve the aspatial Gini Index (*G*) values (2006-2010 5-year ACS) for Massachusetts, U.S.A., census tracts. This metric is based on [Gini (1921)](https://doi.org/10.2307/2223319), and the `gini()` function retrieves the estimate from the ACS-5. -According to the [U.S. Census Bureau](https://www.census.gov/topics/income-poverty/income-inequality/about/metrics/gini-index.html): "The Gini Index is a summary measure of income inequality. The Gini coefficient incorporates the detailed shares data into a single statistic, which summarizes the dispersion of income across the entire income distribution. The Gini coefficient ranges from 0, indicating perfect equality (where everyone receives an equal share), to 1, perfect inequality (where only one recipient or group of recipients receives all the income). The Gini is based on the difference between the Lorenz curve (the observed cumulative income distribution) and the notion of a perfectly equal income distribution." +According to the [U.S. Census Bureau](https://census.gov/topics/income-poverty/income-inequality/about/metrics/gini-index.html): 'The Gini Index is a summary measure of income inequality. The Gini coefficient incorporates the detailed shares data into a single statistic, which summarizes the dispersion of income across the entire income distribution. The Gini coefficient ranges from 0, indicating perfect equality (where everyone receives an equal share), to 1, perfect inequality (where only one recipient or group of recipients receives all the income). *G* is based on the difference between the Lorenz curve (the observed cumulative income distribution) and the notion of a perfectly equal income distribution.' ```{r gini_prep, results = 'hide'} -gini2010NC <- ndi::gini(state = "NC", year = 2010) +gini2010MA <- gini(state = 'MA', year = 2010) -# Obtain the 2010 census tracts from the "tigris" package -tract2010NC <- tigris::tracts(state = "NC", year = 2010, cb = TRUE) +# Obtain the 2010 census tracts from the 'tigris' package +tract2010MA <- tracts(state = 'MA', year = 2010, cb = TRUE) # Remove first 9 characters from GEOID for compatibility with tigris information -tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) +tract2010MA$GEOID <- substring(tract2010MA$GEO_ID, 10) -# Obtain the 2010 counties from the "tigris" package -county2010NC <- tigris::counties(state = "NC", year = 2010, cb = TRUE) +# Obtain the 2010 counties from the 'tigris' package +county2010MA <- counties(state = 'MA', year = 2010, cb = TRUE) -# Join the Gini Index values to the census tract geometry -NC2010gini <- dplyr::left_join(tract2010NC, gini2010NC$gini, by = "GEOID") +# Join the G (Gini) values to the census tract geometry +MA2010gini <- tract2010MA %>% + left_join(gini2010MA$g, by = 'GEOID') ``` ```{r gini_plot, fig.height = 4, fig.width = 7} -# Visualize the Gini Index values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts -ggplot2::ggplot() + - ggplot2::geom_sf(data = NC2010gini, - ggplot2::aes(fill = gini), - size = 0.05, - color = "transparent") + - ggplot2::geom_sf(data = county2010NC, - fill = "transparent", - color = "white", - size = 0.2) + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c() + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Gini Index", - subtitle = "NC census tracts") -``` - -### Index of Concentration at the Extremes (ICE) - -Compute the Index of Concentration at the Extremes values (2006-2010 5-year ACS) for Wayne County, Michigan, U.S.A., census tracts. Wayne County is the home of Detroit, Michigan, a highly segregated city in the U.S. This metric is based on [Feldman et al. (2015)](https://www.doi.org/10.1136/jech-2015-205728) and [Krieger et al. (2016)](https://www.doi.org/10.2105/AJPH.2015.302955) who expanded the metric designed by Massey in a chapter of [Booth & Crouter (2001)](https://www.doi.org/10.4324/9781410600141) initially designed for residential segregation. The `krieger()` function computes five ICE metrics using the following ACS-5 groups: - -| ACS table group | ICE metric | Comparison +# Visualize the G (Gini) values (2006-2010 5-year ACS) for Massachusetts, U.S.A., census tracts +ggplot() + + geom_sf( + data = MA2010gini, + aes(fill = G), + size = 0.05, + color = 'transparent' + ) + + geom_sf( + data = county2010MA, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle('Gini Index', subtitle = 'MA census tracts') +``` + +### Index of Concentration at the Extremes (*ICE*) + +Compute the aspatial Index of Concentration at the Extremes values (2006-2010 5-year ACS) for Wayne County, Michigan, U.S.A., census tracts. Wayne County is the home of Detroit, Michigan, a highly segregated city in the U.S. This metric is based on [Feldman et al. (2015)](https://doi.org/10.1136/jech-2015-205728) and [Krieger et al. (2016)](https://doi.org/10.2105/AJPH.2015.302955) who expanded the metric designed by Massey in a chapter of [Booth & Crouter (2001)](https://doi.org/10.4324/9781410600141) initially designed for residential segregation. The `krieger()` function computes five *ICE* metrics using the following ACS-5 groups: + +| ACS table group | *ICE* metric | Comparison | | -------------- | ------------- | ---------------- | -| B19001 | Income, "ICE_inc"| 80th income percentile vs. 20th income percentile | -| B15002 | Education, "ICE_edu"| less than high school vs. four-year college degree or more | -| B03002 | Race/Ethnicity, "ICE_rewb"| 80th income percentile vs. 20th income percentile | -| B19001 & B19001B & B19001H | Income and race/ethnicity combined, "ICE_wbinc" | white non-Hispanic in 80th income percentile vs. black alone (including Hispanic) in 20th income percentile | -| B19001 & B19001H | Income and race/ethnicity combined, "ICE_wpcinc"| white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile | +| B19001 | Income, 'ICE_inc'| 80th income percentile vs. 20th income percentile | +| B15002 | Education, 'ICE_edu'| less than high school vs. four-year college degree or more | +| B03002 | Race/Ethnicity, 'ICE_rewb'| 80th income percentile vs. 20th income percentile | +| B19001 & B19001B & B19001H | Income and race/ethnicity combined, 'ICE_wbinc' | white non-Hispanic in 80th income percentile vs. black alone (including Hispanic) in 20th income percentile | +| B19001 & B19001H | Income and race/ethnicity combined, 'ICE_wpcinc'| white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile | -ICE metrics can range in value from −1 (most deprived) to 1 (most privileged). A value of 0 can thus represent two possibilities: (1) none of the residents are in the most privileged or most deprived categories, or (2) an equal number of persons are in the most privileged and most deprived categories, and in both cases indicates that the area is not dominated by extreme concentrations of either of the two groups. +*ICE* metrics can range in value from −1 (most deprived) to 1 (most privileged). A value of 0 can thus represent two possibilities: (1) none of the residents are in the most privileged or most deprived categories, or (2) an equal number of persons are in the most privileged and most deprived categories, and in both cases indicates that the area is not dominated by extreme concentrations of either of the two groups. ```{r krieger_prep, results = 'hide'} -ice2020WC <- krieger(state = "MI", county = "Wayne", year = 2010) +ice2020WC <- krieger(state = 'MI', county = 'Wayne', year = 2010) -# Obtain the 2010 census tracts from the "tigris" package -tract2010WC <- tigris::tracts(state = "MI", county = "Wayne", year = 2010, cb = TRUE) +# Obtain the 2010 census tracts from the 'tigris' package +tract2010WC <- tracts(state = 'MI', county = 'Wayne', year = 2010, cb = TRUE) # Remove first 9 characters from GEOID for compatibility with tigris information tract2010WC$GEOID <- substring(tract2010WC$GEO_ID, 10) -# Join the ICEs (Krieger) values to the census tract geometry -ice2020WC <- dplyr::left_join(tract2010WC, ice2020WC$ice, by = "GEOID") +# Join the ICE values to the census tract geometry +ice2020WC <- tract2010WC %>% + left_join(ice2020WC$ice, by = 'GEOID') ``` ```{r krieger_plot, fig.height = 5.5, fig.width = 7} # Plot ICE for Income -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020WC, - ggplot2::aes(fill = ICE_inc), - color = "white", - size = 0.05) + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1,1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome (Krieger)", - subtitle = "80th income percentile vs. 20th income percentile") +ggplot() + + geom_sf( + data = ice2020WC, + aes(fill = ICE_inc), + color = 'white', + size = 0.05 + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Index of Concentration at the Extremes\nIncome (Krieger)', + subtitle = '80th income percentile vs. 20th income percentile' + ) # Plot ICE for Education -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020WC, - ggplot2::aes(fill = ICE_edu), - color = "white", - size = 0.05) + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1,1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nEducation (Krieger)", - subtitle = "less than high school vs. four-year college degree or more") +ggplot() + + geom_sf( + data = ice2020WC, + aes(fill = ICE_edu), + color = 'white', + size = 0.05 + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Index of Concentration at the Extremes\nEducation (Krieger)', + subtitle = 'less than high school vs. four-year college degree or more' + ) # Plot ICE for Race/Ethnicity -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020WC, - ggplot2::aes(fill = ICE_rewb), - color = "white", - size = 0.05) + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nRace/Ethnicity (Krieger)", - subtitle = "white non-Hispanic vs. black non-Hispanic") +ggplot() + + geom_sf( + data = ice2020WC, + aes(fill = ICE_rewb), + color = 'white', + size = 0.05 + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Index of Concentration at the Extremes\nRace/Ethnicity (Krieger)', + subtitle = 'white non-Hispanic vs. black non-Hispanic' + ) # Plot ICE for Income and Race/Ethnicity Combined -## white non-Hispanic in 80th income percentile vs. black (including Hispanic) in 20th income percentile -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020WC, - ggplot2::aes(fill = ICE_wbinc), - color = "white", - size = 0.05) + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome & race/ethnicity combined (Krieger)", - subtitle = "white non-Hispanic in 80th inc ptcl vs. black alone in 20th inc pctl") +## white non-Hispanic in 80th income percentile vs. +## black (including Hispanic) in 20th income percentile +ggplot() + + geom_sf( + data = ice2020WC, + aes(fill = ICE_wbinc), + color = 'white', + size = 0.05 + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Index of Concentration at the Extremes\nIncome & race/ethnicity combined (Krieger)', + subtitle = 'white non-Hispanic in 80th inc ptcl vs. black alone in 20th inc pctl' + ) # Plot ICE for Income and Race/Ethnicity Combined ## white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile -ggplot2::ggplot() + - ggplot2::geom_sf(data = ice2020WC, - ggplot2::aes(fill = ICE_wpcinc), - color = "white", - size = 0.05) + - ggplot2::theme_bw() + - ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates")+ - ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome & race/ethnicity combined (Krieger)", - subtitle = "white non-Hispanic (WNH) in 80th inc pctl vs. WNH in 20th inc pctl") +ggplot() + + geom_sf( + data = ice2020WC, + aes(fill = ICE_wpcinc), + color = 'white', + size = 0.05 + ) + + theme_bw() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + limits = c(-1, 1) + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Index of Concentration at the Extremes\nIncome & race/ethnicity combined (Krieger)', + subtitle = 'white non-Hispanic (WNH) in 80th inc pctl vs. WNH in 20th inc pctl' + ) ``` -#### Compute racial/ethnic Dissimilarity Index (DI) +#### Compute racial/ethnic Dissimilarity Index (*D*) -Compute the DI (Duncan) values (2006-2010 5-year ACS) for Pennsylvania, U.S.A., counties from census tracts. This metric is based on [Duncan & Duncan (1955)](https://doi.org/10.2307/2088328) that assessed the racial/ethnic isolation of students that identify as non-Hispanic or Latino, Black or African American alone compared to students that identify as non-Hispanic or Latino, white alone between schools and school districts. Multiple racial/ethnic subgroups are available in the `duncan()` function, including: +Compute the aspatial racial/ethnic *D* values (2006-2010 5-year ACS) for Pennsylvania, U.S.A., counties from census tracts. This metric is based on [Duncan & Duncan (1955)](https://doi.org/10.2307/2088328) that assessed the racial/ethnic isolation of students that identify as non-Hispanic or Latino, Black or African American alone compared to students that identify as non-Hispanic or Latino, white alone between schools and school districts. Multiple racial/ethnic subgroups are available in the `duncan()` function, including: | ACS table source | racial/ethnic subgroup | character for `subgroup` or `subgroup_ref` arguments | | -------------- | ------------- | ---------------- | @@ -776,43 +933,55 @@ Compute the DI (Duncan) values (2006-2010 5-year ACS) for Pennsylvania, U.S.A., | B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | | B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | -DI is a measure of the evenness of racial/ethnic residential segregation when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. The DI metric can range in value from 0 to 1 and represents the proportion of racial/ethnic subgroup members that would have to change their area of residence to achieve an even distribution within the larger geographical area under conditions of maximum segregation. +*D* is a measure of the evenness of racial/ethnic residential segregation when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. *D* can range in value from 0 to 1 and represents the proportion of racial/ethnic subgroup members that would have to change their area of residence to achieve an even distribution within the larger geographical area under conditions of maximum segregation. ```{r duncan_prep, results = 'hide'} -duncan2010PA <- ndi::duncan(geo_large = "county", geo_small = "tract", state = "PA", - year = 2010, subgroup = "NHoLB", subgroup_ref = "NHoLW") - -# Obtain the 2010 census counties from the "tigris" package -county2010PA <- tigris::counties(state = "PA", year = 2010, cb = TRUE) +duncan2010PA <- duncan( + geo_large = 'county', + geo_small = 'tract', + state = 'PA', + year = 2010, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW' +) + +# Obtain the 2010 census counties from the 'tigris' package +county2010PA <- counties(state = 'PA', year = 2010, cb = TRUE) # Remove first 9 characters from GEOID for compatibility with tigris information county2010PA$GEOID <- substring(county2010PA$GEO_ID, 10) -# Join the DI (Duncan) values to the county geometry -PA2010duncan <- dplyr::left_join(county2010PA, duncan2010PA$di, by = "GEOID") +# Join the D values to the county geometry +PA2010duncan <- county2010PA %>% + left_join(duncan2010PA$d, by = 'GEOID') ``` ```{r duncan_plot, fig.height = 4, fig.width = 7} -# Visualize the DI (Duncan) values (2006-2010 5-year ACS) for Pennsylvania, U.S.A., counties -ggplot2::ggplot() + - ggplot2::geom_sf(data = PA2010duncan, - ggplot2::aes(fill = DI), - size = 0.05, - color = "white") + - ggplot2::geom_sf(data = county2010PA, - fill = "transparent", - color = "white", - size = 0.2) + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c(limits = c(0, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2006-2010 estimates") + - ggplot2::ggtitle("Dissimilarity Index (Duncan)\nPennsylvania census tracts to counties", - subtitle = "Black non-Hispanic vs. white non-Hispanic") -``` - -#### Compute Atkinson Index (AI) - -Compute the AI (Atkinson) values (2017-2021 5-year ACS) for Kentucky, U.S.A., counties from census block groups. This metric is based on [Atkinson (1970)](https://doi.org/10.2307/2088328) that assessed the distribution of income within 12 counties but has since been adapted to study racial/ethnic segregation (see [James & Taeuber 1985](https://doi.org/10.2307/270845)). Multiple racial/ethnic subgroups are available in the `atkinson()` function, including: +# Visualize the D values (2006-2010 5-year ACS) for Pennsylvania, U.S.A., counties +ggplot() + + geom_sf( + data = PA2010duncan, + aes(fill = D), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = county2010PA, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') + + ggtitle( + 'Dissimilarity Index (Duncan & Duncan)\nPennsylvania census tracts to counties', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +``` + +#### Compute aspatial income or racial/ethnic Atkinson Index (*A*) + +Compute the aspatial income or racial/ethnic *A* values (2017-2021 5-year ACS) for Kentucky, U.S.A., counties from census block groups. This metric is based on [Atkinson (1970)](https://doi.org/10.2307/2088328) that assessed the distribution of income within 12 counties but has since been adapted to study racial/ethnic segregation (see [James & Taeuber 1985](https://doi.org/10.2307/270845)). To compare median household income, specify `subgroup = 'MedHHInc'` which will use the ACS-5 variable 'B19013_001' in the computation. Multiple racial/ethnic subgroups are available in the `atkinson()` function, including: | ACS table source | racial/ethnic subgroup | character for `subgroup` argument | | -------------- | ------------- | ---------------- | @@ -837,40 +1006,588 @@ Compute the AI (Atkinson) values (2017-2021 5-year ACS) for Kentucky, U.S.A., co | B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | | B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | -To compare median household income, specify `subgroup = "MedHHInc"` which will use the ACS-5 variable "B19013_001" in the computation. +*A* is a measure of the inequality and, in the context of residential race/ethnicity, segregation when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. *A* can range in value from 0 to 1 and smaller values of the index indicate lower levels of inequality (e.g., less segregation). -AI is a measure of the inequality and, in the context of residential race/ethnicity, segregation when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. The AI metric can range in value from 0 to 1 and smaller values of the index indicate lower levels of inequality (e.g., less segregation). - -AI is sensitive to the choice of `epsilon` argument or the shape parameter that determines how to weight the increments to inequality (segregation) contributed by different proportions of the Lorenz curve. A user must explicitly decide how heavily to weight smaller geographical units at different points on the Lorenz curve (i.e., whether the index should take greater account of differences among areas of over- or under-representation). The `epsilon` argument must have values between 0 and 1.0. For `0 <= epsilon < 0.5` or less "inequality-averse," smaller geographical units with a subgroup proportion smaller than the subgroup proportion of the larger geographical unit contribute more to inequality ("over-representation"). For `0.5 < epsilon <= 1.0` or more "inequality-averse," smaller geographical units with a subgroup proportion larger than the subgroup proportion of the larger geographical unit contribute more to inequality ("under-representation"). If `epsilon = 0.5` (the default), units of over- and under-representation contribute equally to the index. See Section 2.3 of [Saint-Jacques et al. (2020)](https://www.doi.org/10.48550/arXiv.2002.05819) for one method to select `epsilon`. We choose `epsilon = 0.67` in the example below: +*A* is sensitive to the choice of `epsilon` argument or the shape parameter that determines how to weight the increments to inequality (segregation) contributed by different proportions of the Lorenz curve. A user must explicitly decide how heavily to weight smaller geographical units at different points on the Lorenz curve (i.e., whether the index should take greater account of differences among areas of over- or under-representation). The `epsilon` argument must have values between 0 and 1.0. For `0 <= epsilon < 0.5` or less 'inequality-averse,' smaller geographical units with a subgroup proportion smaller than the subgroup proportion of the larger geographical unit contribute more to inequality ('over-representation'). For `0.5 < epsilon <= 1.0` or more 'inequality-averse,' smaller geographical units with a subgroup proportion larger than the subgroup proportion of the larger geographical unit contribute more to inequality ('under-representation'). If `epsilon = 0.5` (the default), units of over- and under-representation contribute equally to the index. See Section 2.3 of [Saint-Jacques et al. (2020)](https://doi.org/10.48550/arXiv.2002.05819) for one method to select `epsilon`. We choose `epsilon = 0.67` in the example below: ```{r atkinson_prep, results = 'hide'} -atkinson2021KY <- ndi::atkinson(geo_large = "county", geo_small = "block group", state = "KY", - year = 2021, subgroup = "NHoLB", epsilon = 0.67) +atkinson2021KY <- atkinson( + geo_large = 'county', + geo_small = 'block group', + state = 'KY', + year = 2021, + subgroup = 'NHoLB', + epsilon = 0.67 +) + +# Obtain the 2021 census counties from the 'tigris' package +county2021KY <- counties(state = 'KY', year = 2021, cb = TRUE) + +# Join the A values to the county geometry +KY2021atkinson <- county2021KY %>% + left_join(atkinson2021KY$a, by = 'GEOID') +``` -# Obtain the 2021 census counties from the "tigris" package -county2021KY <- tigris::counties(state = "KY", year = 2021, cb = TRUE) +```{r atkinson_plot, fig.height = 4, fig.width = 7} +# Visualize the A values (2017-2021 5-year ACS) for Kentucky, U.S.A., counties +ggplot() + + geom_sf( + data = KY2021atkinson, + aes(fill = A), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = county2021KY, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + + ggtitle( + 'Atkinson Index (Atkinson)\nKentucky census block groups to counties', + subtitle = expression(paste('Black non-Hispanic (', epsilon, ' = 0.67)')) + ) +``` -# Join the AI (Atkinson) values to the county geometry -KY2021atkinson <- dplyr::left_join(county2021KY, atkinson2021KY$ai, by = "GEOID") +#### Compute racial/ethnic Interaction Index (_xPy\*_) + +Compute the aspatial racial/ethnic _xPy\*_ values (2017-2021 5-year ACS) for Ohio, U.S.A., counties from census tracts. This metric is based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and adapted by [Bell (1954)](https://doi.org/10.2307/2574118). Multiple racial/ethnic subgroups are available in the `bell()` function, including: + +| ACS table source | racial/ethnic subgroup | character for `subgroup` or `subgroup_ixn` argument | +| -------------- | ------------- | ---------------- | +| B03002_002 | not Hispanic or Latino | NHoL | +| B03002_003 | not Hispanic or Latino, white alone | NHoLW | +| B03002_004 | not Hispanic or Latino, Black or African American alone | NHoLB | +| B03002_005 | not Hispanic or Latino, American Indian and Alaska Native alone | NHoLAIAN | +| B03002_006 | not Hispanic or Latino, Asian alone | NHoLA | +| B03002_007 | not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone | NHoLNHOPI | +| B03002_008 | not Hispanic or Latino, some other race alone | NHoLSOR | +| B03002_009 | not Hispanic or Latino, two or more races | NHoLTOMR | +| B03002_010 | not Hispanic or Latino, two races including some other race | NHoLTRiSOR | +| B03002_011 | not Hispanic or Latino, two races excluding some other race, and three or more races | NHoLTReSOR | +| B03002_012 | Hispanic or Latino | HoL | +| B03002_013 | Hispanic or Latino, white alone | HoLW | +| B03002_014 | Hispanic or Latino, Black or African American alone | HoLB | +| B03002_015 | Hispanic or Latino, American Indian and Alaska Native alone | HoLAIAN | +| B03002_016 | Hispanic or Latino, Asian alone | HoLA | +| B03002_017 | Hispanic or Latino, Native Hawaiian and other Pacific Islander alone | HoLNHOPI | +| B03002_018 | Hispanic or Latino, some other race alone | HoLSOR | +| B03002_019 | Hispanic or Latino, two or more races | HoLTOMR | +| B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | +| B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | + +_xPy\*_ is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s) with higher values signifying higher probability of interaction (less isolation) when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. _xPy\*_ can range in value from 0 to 1. + +```{r bell_prep, results = 'hide'} +bell2021OH <- bell( + geo_large = 'county', + geo_small = 'tract', + state = 'OH', + year = 2021, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' +) + +# Obtain the 2021 census counties from the 'tigris' package +county2021OH <- counties(state = 'OH', year = 2021, cb = TRUE) + +# Join the xPy* values to the county geometry +OH2021bell <- county2021OH %>% + left_join(bell2021OH$xpy_star, by = 'GEOID') ``` -```{r atkinson_plot, fig.height = 4, fig.width = 7} -# Visualize the AI (Atkinson) values (2017-2021 5-year ACS) for Kentucky, U.S.A., counties -ggplot2::ggplot() + - ggplot2::geom_sf(data = KY2021atkinson, - ggplot2::aes(fill = AI), - size = 0.05, - color = "white") + - ggplot2::geom_sf(data = county2021KY, - fill = "transparent", - color = "white", - size = 0.2) + - ggplot2::theme_minimal() + - ggplot2::scale_fill_viridis_c(limits = c(0, 1)) + - ggplot2::labs(fill = "Index (Continuous)", - caption = "Source: U.S. Census ACS 2017-2021 estimates") + - ggplot2::ggtitle("Atkinson Index (Atkinson)\nKentucky census block groups to counties", - subtitle = expression(paste("Black non-Hispanic (", epsilon, " = 0.67)"))) +```{r bell_plot, fig.height = 6, fig.width = 7} +# Visualize the xPy* values (2017-2021 5-year ACS) for Ohio, U.S.A., counties +ggplot() + + geom_sf( + data = OH2021bell, + aes(fill = xPy_star), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = county2021OH, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + + ggtitle( + 'Interaction Index (Bell)\nOhio census tracts to counties', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +``` + +#### Compute Correlation Ratio (*V*) + +Compute the aspatial racial/ethnic *V* values (2017-2021 5-year ACS) for South Carolina, U.S.A., counties from census tracts. This metric is based on [Bell (1954)](https://doi.org/10.2307/2574118) and adapted by [White (1986)](https://doi.org/10.2307/3644339). Multiple racial/ethnic subgroups are available in the `white()` function, including: + +| ACS table source | racial/ethnic subgroup | character for `subgroup` argument | +| -------------- | ------------- | ---------------- | +| B03002_002 | not Hispanic or Latino | NHoL | +| B03002_003 | not Hispanic or Latino, white alone | NHoLW | +| B03002_004 | not Hispanic or Latino, Black or African American alone | NHoLB | +| B03002_005 | not Hispanic or Latino, American Indian and Alaska Native alone | NHoLAIAN | +| B03002_006 | not Hispanic or Latino, Asian alone | NHoLA | +| B03002_007 | not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone | NHoLNHOPI | +| B03002_008 | not Hispanic or Latino, some other race alone | NHoLSOR | +| B03002_009 | not Hispanic or Latino, two or more races | NHoLTOMR | +| B03002_010 | not Hispanic or Latino, two races including some other race | NHoLTRiSOR | +| B03002_011 | not Hispanic or Latino, two races excluding some other race, and three or more races | NHoLTReSOR | +| B03002_012 | Hispanic or Latino | HoL | +| B03002_013 | Hispanic or Latino, white alone | HoLW | +| B03002_014 | Hispanic or Latino, Black or African American alone | HoLB | +| B03002_015 | Hispanic or Latino, American Indian and Alaska Native alone | HoLAIAN | +| B03002_016 | Hispanic or Latino, Asian alone | HoLA | +| B03002_017 | Hispanic or Latino, Native Hawaiian and other Pacific Islander alone | HoLNHOPI | +| B03002_018 | Hispanic or Latino, some other race alone | HoLSOR | +| B03002_019 | Hispanic or Latino, two or more races | HoLTOMR | +| B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | +| B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | + +*V* removes the asymmetry from the Isolation Index by controlling for the effect of population composition when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. The Isolation Index is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s) with higher values signifying higher probability of interaction (less isolation). *V* can range in value from 0 to Inf. + +```{r white_prep, results = 'hide'} +white2021SC <- white( + geo_large = 'county', + geo_small = 'tract', + state = 'SC', + year = 2021, + subgroup = 'NHoLB' +) + +# Obtain the 2021 census counties from the 'tigris' package +county2021SC <- counties(state = 'SC', year = 2021, cb = TRUE) + +# Join the V values to the county geometry +SC2021white <- county2021SC %>% + left_join(white2021SC$v, by = 'GEOID') +``` + +```{r white_plot, fig.height = 6, fig.width = 7} +# Visualize the V values (2017-2021 5-year ACS) for South Carolina, U.S.A., counties +ggplot() + + geom_sf( + data = SC2021white, + aes(fill = V), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = county2021SC, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + midpoint = 1 + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + + ggtitle( + 'Correlation Ratio (White)\nSouth Carolina census tracts to counties', + subtitle = 'Black non-Hispanic' + ) +``` + +#### Compute Location Quotient (*LQ*) + +Compute the aspatial racial/ethnic *LQ* values (2017-2021 5-year ACS) for Tennessee, U.S.A., counties vs. the state. This metric is based on [Merton (1939)](https://doi.org/10.2307/2084686) and adapted by [Sudano et al. (2013)](https://doi.org/10.1016/j.healthplace.2012.09.015). Multiple racial/ethnic subgroups are available in the `sudano()` function, including: + +| ACS table source | racial/ethnic subgroup | character for `subgroup` argument | +| -------------- | ------------- | ---------------- | +| B03002_002 | not Hispanic or Latino | NHoL | +| B03002_003 | not Hispanic or Latino, white alone | NHoLW | +| B03002_004 | not Hispanic or Latino, Black or African American alone | NHoLB | +| B03002_005 | not Hispanic or Latino, American Indian and Alaska Native alone | NHoLAIAN | +| B03002_006 | not Hispanic or Latino, Asian alone | NHoLA | +| B03002_007 | not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone | NHoLNHOPI | +| B03002_008 | not Hispanic or Latino, some other race alone | NHoLSOR | +| B03002_009 | not Hispanic or Latino, two or more races | NHoLTOMR | +| B03002_010 | not Hispanic or Latino, two races including some other race | NHoLTRiSOR | +| B03002_011 | not Hispanic or Latino, two races excluding some other race, and three or more races | NHoLTReSOR | +| B03002_012 | Hispanic or Latino | HoL | +| B03002_013 | Hispanic or Latino, white alone | HoLW | +| B03002_014 | Hispanic or Latino, Black or African American alone | HoLB | +| B03002_015 | Hispanic or Latino, American Indian and Alaska Native alone | HoLAIAN | +| B03002_016 | Hispanic or Latino, Asian alone | HoLA | +| B03002_017 | Hispanic or Latino, Native Hawaiian and other Pacific Islander alone | HoLNHOPI | +| B03002_018 | Hispanic or Latino, some other race alone | HoLSOR | +| B03002_019 | Hispanic or Latino, two or more races | HoLTOMR | +| B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | +| B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | + +*LQ* is some measure of relative racial homogeneity of each smaller geography within a larger geography. *LQ* can range in value from 0 to infinity because it is ratio of two proportions in which the numerator is the proportion of subgroup population in a smaller geography and the denominator is the proportion of subgroup population in its larger geography. For example, a smaller geography with an *LQ* of 5 means that the proportion of the subgroup population living in the smaller geography is five times the proportion of the subgroup population in its larger geography. Unlike the previous metrics that aggregate to the larger geography, *LQ* computes values for each smaller geography relative to the larger geography. + +```{r sudano_prep, results = 'hide'} +sudano2021TN <- sudano( + geo_large = 'state', + geo_small = 'county', + state = 'TN', + year = 2021, + subgroup = 'NHoLB' +) + +# Obtain the 2021 census counties from the 'tigris' package +county2021TN <- counties(state = 'TN', year = 2021, cb = TRUE) + +# Join the LQ values to the county geometry +TN2021sudano <- county2021TN %>% + left_join(sudano2021TN$lq, by = 'GEOID') +``` + +```{r sudano_plot, fig.height = 3, fig.width = 7} +# Visualize the LQ values (2017-2021 5-year ACS) for Tennessee, U.S.A., counties +ggplot() + + geom_sf( + data = TN2021sudano, + aes(fill = LQ), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = county2021TN, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + + ggtitle( + 'Location Quotient (Sudano)\nTennessee counties vs. state', + subtitle = 'Black non-Hispanic' + ) +``` + +#### Compute Local Exposure and Isolation (*LEx/Is*) + +Compute the aspatial racial/ethnic Local Exposure and Isolation metric (2017-2021 5-year ACS) for Mississippi, U.S.A., counties vs. the state. This metric is based on [Bemanian & Beyer (2017)](https://doi.org/10.1158/1055-9965.EPI-16-0926). Multiple racial/ethnic subgroups are available in the `bemanian_beyer()` function, including: + +| ACS table source | racial/ethnic subgroup | character for `subgroup` or `subgroup_ixn` argument | +| -------------- | ------------- | ---------------- | +| B03002_002 | not Hispanic or Latino | NHoL | +| B03002_003 | not Hispanic or Latino, white alone | NHoLW | +| B03002_004 | not Hispanic or Latino, Black or African American alone | NHoLB | +| B03002_005 | not Hispanic or Latino, American Indian and Alaska Native alone | NHoLAIAN | +| B03002_006 | not Hispanic or Latino, Asian alone | NHoLA | +| B03002_007 | not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone | NHoLNHOPI | +| B03002_008 | not Hispanic or Latino, some other race alone | NHoLSOR | +| B03002_009 | not Hispanic or Latino, two or more races | NHoLTOMR | +| B03002_010 | not Hispanic or Latino, two races including some other race | NHoLTRiSOR | +| B03002_011 | not Hispanic or Latino, two races excluding some other race, and three or more races | NHoLTReSOR | +| B03002_012 | Hispanic or Latino | HoL | +| B03002_013 | Hispanic or Latino, white alone | HoLW | +| B03002_014 | Hispanic or Latino, Black or African American alone | HoLB | +| B03002_015 | Hispanic or Latino, American Indian and Alaska Native alone | HoLAIAN | +| B03002_016 | Hispanic or Latino, Asian alone | HoLA | +| B03002_017 | Hispanic or Latino, Native Hawaiian and other Pacific Islander alone | HoLNHOPI | +| B03002_018 | Hispanic or Latino, some other race alone | HoLSOR | +| B03002_019 | Hispanic or Latino, two or more races | HoLTOMR | +| B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | +| B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | + +*LEx/Is* is a measure of the probability that two individuals living within a specific smaller geography (e.g., census tract) of either different (i.e., exposure) or the same (i.e., isolation) racial/ethnic subgroup(s) will interact, assuming that individuals within a smaller geography are randomly mixed. *LEx/Is* is standardized with a logit transformation and centered against an expected case that all races/ethnicities are evenly distributed across a larger geography. *LEx/Is* can range from negative infinity to infinity. If *LEx/Is* is zero then the estimated probability of the interaction between two people of the given subgroup(s) within a smaller geography is equal to the expected probability if the subgroup(s) were perfectly mixed in the larger geography. If *LEx/Is* is greater than zero then the interaction is more likely to occur within the smaller geography than in the larger geography, and if *LEx/Is* is less than zero then the interaction is less likely to occur within the smaller geography than in the larger geography. Note: the exponentiation of each *LEx/Is* metric results in the odds ratio of the specific exposure or isolation of interest in a smaller geography relative to the larger geography. Similar to *LQ* (Sudano), *LEx/Is* computes values for each smaller geography relative to the larger geography. + +```{r bemanian_beyer_prep, results = 'hide'} +bemanian_beyer2021MS <- bemanian_beyer( + geo_large = 'state', + geo_small = 'county', + state = 'MS', + year = 2021, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' +) + +# Obtain the 2021 census counties from the 'tigris' package +county2021MS <- counties(state = 'MS', year = 2021, cb = TRUE) + +# Join the LEx/Is values to the county geometry +MS2021bemanian_beyer <- county2021MS %>% + left_join(bemanian_beyer2021MS$lexis, by = 'GEOID') +``` + +```{r bemanian_beyer_plot, fig.height = 7, fig.width = 6.5} +# Visualize the LEx/Is values (2017-2021 5-year ACS) for Mississippi, U.S.A., counties +ggplot() + + geom_sf( + data = MS2021bemanian_beyer, + aes(fill = LExIs), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = county2021MS, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340' + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + + ggtitle( + 'Local Exposure and Isolation (Bemanian & Beyer)\nMississippi counties vs. state', + subtitle = 'Black non-Hispanic vs. White non-Hispanic' + ) +``` +```{r bemanian_beyer_odds, fig.height = 7, fig.width = 6.5} +# Visualize the exponentiated LEx/Is values (2017-2021 5-year ACS) for +## Mississippi, U.S.A., counties +ggplot() + + geom_sf( + data = MS2021bemanian_beyer, + aes(fill = exp(LExIs)), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = county2021MS, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c() + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + + ggtitle( + 'Odds ratio of Local Exposure and Isolation (Bemanian & Beyer)\n + Mississippi counties vs. state', + subtitle = 'Black non-Hispanic vs. White non-Hispanic' + ) +``` + +#### Compute Delta (*DEL*) + +Compute the aspatial racial/ethnic *DEL* values (2017-2021 5-year ACS) for Alabama, U.S.A., counties from census tracts. This metric is based on [Hoover (1941)](https://doi.org/10.1017/S0022050700052980) and Duncan et al. (1961; LC:60007089). Multiple racial/ethnic subgroups are available in the `hoover()` function, including: + +| ACS table source | racial/ethnic subgroup | character for `subgroup` argument | +| -------------- | ------------- | ---------------- | +| B03002_002 | not Hispanic or Latino | NHoL | +| B03002_003 | not Hispanic or Latino, white alone | NHoLW | +| B03002_004 | not Hispanic or Latino, Black or African American alone | NHoLB | +| B03002_005 | not Hispanic or Latino, American Indian and Alaska Native alone | NHoLAIAN | +| B03002_006 | not Hispanic or Latino, Asian alone | NHoLA | +| B03002_007 | not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone | NHoLNHOPI | +| B03002_008 | not Hispanic or Latino, some other race alone | NHoLSOR | +| B03002_009 | not Hispanic or Latino, two or more races | NHoLTOMR | +| B03002_010 | not Hispanic or Latino, two races including some other race | NHoLTRiSOR | +| B03002_011 | not Hispanic or Latino, two races excluding some other race, and three or more races | NHoLTReSOR | +| B03002_012 | Hispanic or Latino | HoL | +| B03002_013 | Hispanic or Latino, white alone | HoLW | +| B03002_014 | Hispanic or Latino, Black or African American alone | HoLB | +| B03002_015 | Hispanic or Latino, American Indian and Alaska Native alone | HoLAIAN | +| B03002_016 | Hispanic or Latino, Asian alone | HoLA | +| B03002_017 | Hispanic or Latino, Native Hawaiian and other Pacific Islander alone | HoLNHOPI | +| B03002_018 | Hispanic or Latino, some other race alone | HoLSOR | +| B03002_019 | Hispanic or Latino, two or more races | HoLTOMR | +| B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | +| B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | + +*DEL* is a measure of the proportion of members of one subgroup(s) residing in geographic units with above average density of members of the subgroup(s). The index provides the proportion of a subgroup population that would have to move across geographic units to achieve a uniform density. *DEL* can range in value from 0 to 1. + +```{r hoover_prep, results = 'hide'} +hoover2021AL <- hoover( + geo_large = 'county', + geo_small = 'tract', + state = 'AL', + year = 2021, + subgroup = 'NHoLB' +) + +# Obtain the 2021 census counties from the 'tigris' package +county2021AL <- counties(state = 'AL', year = 2021, cb = TRUE) + +# Join the DEL values to the county geometry +AL2021hoover <- county2021AL %>% + left_join(hoover2021AL$del, by = 'GEOID') +``` + +```{r hoover_plot, fig.height = 7, fig.width = 6} +# Visualize the DEL values (2017-2021 5-year ACS) for Alabama, U.S.A., counties +ggplot() + + geom_sf( + data = AL2021hoover, + aes(fill = DEL), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = county2021AL, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + + ggtitle( + 'Delta (Hoover)\nAlabama census tracts to counties', + subtitle = 'Black non-Hispanic' + ) +``` + +#### Compute an index of spatial proximity (*SP*) + +Compute an index of spatial proximity (2010-2014 5-year ACS) for Atlanta, GA, metropolitan area from census tracts. This metric is based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) that designed the metric to identify racial or ethnic enclaves. Multiple racial/ethnic subgroups are available in the `white_blau()` function, including: + +| ACS table source | racial/ethnic subgroup | character for `subgroup` or `subgroup_ref` arguments | +| -------------- | ------------- | ---------------- | +| B03002_002 | not Hispanic or Latino | NHoL | +| B03002_003 | not Hispanic or Latino, white alone | NHoLW | +| B03002_004 | not Hispanic or Latino, Black or African American alone | NHoLB | +| B03002_005 | not Hispanic or Latino, American Indian and Alaska Native alone | NHoLAIAN | +| B03002_006 | not Hispanic or Latino, Asian alone | NHoLA | +| B03002_007 | not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone | NHoLNHOPI | +| B03002_008 | not Hispanic or Latino, some other race alone | NHoLSOR | +| B03002_009 | not Hispanic or Latino, two or more races | NHoLTOMR | +| B03002_010 | not Hispanic or Latino, two races including some other race | NHoLTRiSOR | +| B03002_011 | not Hispanic or Latino, two races excluding some other race, and three or more races | NHoLTReSOR | +| B03002_012 | Hispanic or Latino | HoL | +| B03002_013 | Hispanic or Latino, white alone | HoLW | +| B03002_014 | Hispanic or Latino, Black or African American alone | HoLB | +| B03002_015 | Hispanic or Latino, American Indian and Alaska Native alone | HoLAIAN | +| B03002_016 | Hispanic or Latino, Asian alone | HoLA | +| B03002_017 | Hispanic or Latino, Native Hawaiian and other Pacific Islander alone | HoLNHOPI | +| B03002_018 | Hispanic or Latino, some other race alone | HoLSOR | +| B03002_019 | Hispanic or Latino, two or more races | HoLTOMR | +| B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | +| B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | + +*SP* is a measure of clustering of racial/ethnic populations within smaller geographical areas that are located within larger geographical areas. *SP* can range in value from 0 to Inf and represents the degree to which an area is a racial or ethnic enclave. A value of 1 indicates there is no differential clustering between subgroup and referent group members. A value greater than 1 indicates subgroup members live nearer to one another than to referent subgroup members. A value less than 1 indicates subgroup live nearer to and referent subgroup members than to their own subgroup members. + +```{r white_blau_prep, results = 'hide'} +whiteblau2014GA <- white_blau( + geo_large = 'csa', + geo_small = 'tract', + state = c('GA', 'AL', 'TN', 'FL'), + year = 2014, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW' +) + +# Obtain the 2014 Combined Statistical Areas from the 'tigris' package +csa2014 <- combined_statistical_areas(year = 2014, cb = TRUE) +# Obtain the 2014 state from the 'tigris' package +state2014 <- states(cb = TRUE) + +# Join the SP values to the CSA geometries and filter for Georgia +GA2010whiteblau <- csa2014 %>% + left_join(whiteblau2014GA$sp, by = 'GEOID') %>% + filter(!st_is_empty(.)) %>% + filter(!is.na(SP)) %>% + st_filter(state2014 %>% filter(STUSPS == 'GA')) %>% + st_make_valid() +``` + +```{r white_blau_plot, fig.height = 7, fig.width = 7} +# Visualize the SP values (2010-2014 5-year ACS) for Georgia, U.S.A., CSAs +ggplot() + + geom_sf( + data = GA2010whiteblau, + aes(fill = SP), + # size = 0.05, + # color = 'white' + ) + + geom_sf( + data = state2014 %>% filter(STUSPS == 'GA'), + fill = 'transparent', + color = 'black', + size = 0.2 + ) + + theme_minimal() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + midpoint = 1 + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2010-2014 estimates') + + ggtitle( + 'An index of spatial proximity (White)\nCensus tracts to Combined Statistical Areas in Georgia', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +``` + +#### Compute racial/ethnic Isolation Index (_xPx\*_) + +Compute the aspatial racial/ethnic _xPx\*_ values (2015-2019 5-year ACS) for Delaware, U.S.A., census tracts from census block groups. This metric is based on [Bell (1954)](https://doi.org/10.2307/2574118) and adapted by Lieberson (1981; ISBN-13:978-1-032-53884-6). Multiple racial/ethnic subgroups are available in the `lieberson()` function, including: + +| ACS table source | racial/ethnic subgroup | character for `subgroup` argument | +| -------------- | ------------- | ---------------- | +| B03002_002 | not Hispanic or Latino | NHoL | +| B03002_003 | not Hispanic or Latino, white alone | NHoLW | +| B03002_004 | not Hispanic or Latino, Black or African American alone | NHoLB | +| B03002_005 | not Hispanic or Latino, American Indian and Alaska Native alone | NHoLAIAN | +| B03002_006 | not Hispanic or Latino, Asian alone | NHoLA | +| B03002_007 | not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone | NHoLNHOPI | +| B03002_008 | not Hispanic or Latino, some other race alone | NHoLSOR | +| B03002_009 | not Hispanic or Latino, two or more races | NHoLTOMR | +| B03002_010 | not Hispanic or Latino, two races including some other race | NHoLTRiSOR | +| B03002_011 | not Hispanic or Latino, two races excluding some other race, and three or more races | NHoLTReSOR | +| B03002_012 | Hispanic or Latino | HoL | +| B03002_013 | Hispanic or Latino, white alone | HoLW | +| B03002_014 | Hispanic or Latino, Black or African American alone | HoLB | +| B03002_015 | Hispanic or Latino, American Indian and Alaska Native alone | HoLAIAN | +| B03002_016 | Hispanic or Latino, Asian alone | HoLA | +| B03002_017 | Hispanic or Latino, Native Hawaiian and other Pacific Islander alone | HoLNHOPI | +| B03002_018 | Hispanic or Latino, some other race alone | HoLSOR | +| B03002_019 | Hispanic or Latino, two or more races | HoLTOMR | +| B03002_020 | Hispanic or Latino, two races including some other race | HoLTRiSOR | +| B03002_021 | Hispanic or Latino, two races excluding some other race, and three or more races | HoLTReSOR | + +_xPx\*_ is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s) with higher values signifying higher probability of interaction (less isolation) when comparing smaller geographical areas to larger ones within which the smaller geographical areas are located. _xPx\*_ can range in value from 0 to 1. + +```{r lieberson_prep, results = 'hide'} +lieberson2021DE <- lieberson( + geo_large = 'tract', + geo_small = 'block group', + state = 'DE', + year = 2019, + subgroup = 'NHoLB' +) + +# Obtain the 2021 census counties from the 'tigris' package +tract2021DE <- tracts(state = 'DE', year = 2019, cb = TRUE) + +# Join the xPx* values to the county geometry +DE2021lieberson <- tract2021DE %>% + left_join(lieberson2021DE$xpx_star, by = 'GEOID') +``` + +```{r lieberson_plot, fig.height = 9, fig.width = 7} +# Visualize the xPx* values (2015-2019 5-year ACS) for Delaware, U.S.A., census tracts +ggplot() + + geom_sf( + data = DE2021lieberson, + aes(fill = xPx_star), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = tract2021DE, + fill = 'transparent', + color = 'white', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2015-2019 estimates') + + ggtitle( + 'Isolation Index (Lieberson)\nDelaware census block groups to census tracts', + subtitle = 'Black non-Hispanic' + ) ``` ```{r system} diff --git a/vignettes/vignette.html b/vignettes/vignette.html index f8613a6..96b983c 100644 --- a/vignettes/vignette.html +++ b/vignettes/vignette.html @@ -12,7 +12,7 @@ - + ndi: Neighborhood Deprivation Indices @@ -340,28 +340,28 @@

ndi: Neighborhood Deprivation Indices

Ian D. Buller (GitHub: @idblr)

-

2022-12-09

+

2024-08-22

Start with the necessary packages for the vignette.

-
loadedPackages <- c("dplyr", "ggplot2", "ndi", "tidycensus", "tigris")
-invisible(lapply(loadedPackages, library, character.only = TRUE))
-options(tigris_use_cache = TRUE)
+
loadedPackages <- c('dplyr', 'ggplot2', 'ndi', 'sf', 'tidycensus', 'tigris')
+invisible(lapply(loadedPackages, library, character.only = TRUE))
+options(tigris_use_cache = TRUE)

Set your U.S. Census Bureau access key. Follow this link to obtain one. Specify your access key in the messer() or powell_wiley() functions using the key -argument of the get_acs() function from the -tidycensus package called within each or by using the -census_api_key() function from the tidycensus +argument of the get_acs() function from the tidycensus +package called within each or by using the census_api_key() +function from the tidycensus package before running the messer() or powell_wiley() functions (see an example of the latter below).

-
tidycensus::census_api_key("...") # INSERT YOUR OWN KEY FROM U.S. CENSUS API
+
census_api_key('...') # INSERT YOUR OWN KEY FROM U.S. CENSUS API
-

Compute NDI (Messer)

-

Compute the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, -U.S.A., census tracts. This metric is based on Messer et +

Compute NDI (Messer)

+

Compute the NDI (Messer) values (2006-2010 5-year ACS) for +Georgia, U.S.A., census tracts. This metric is based on Messer et al. (2006) with the following socio-economic status (SES) variables:

@@ -430,32 +430,32 @@

Compute NDI (Messer)

-
messer2010GA <- ndi::messer(state = "GA", year = 2010, round_output = TRUE)
+
messer2010GA <- messer(state = 'GA', year = 2010, round_output = TRUE)

One output from the messer() function is a tibble -containing the identification, geographic name, NDI (Messer) values, and -raw census characteristics for each tract.

-
messer2010GA$ndi
+containing the identification, geographic name, NDI (Messer) +values, and raw census characteristics for each tract.

+
messer2010GA$ndi
## # A tibble: 1,969 × 14
-##    GEOID  state county tract     NDI NDIQu…¹   OCC   CWD   POV   FHH   PUB   U30
-##    <chr>  <chr> <chr>  <chr>   <dbl> <fct>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-##  1 13001… Geor… Appli… 9501  -0.0075 2-Belo…     0   0     0.1   0.1   0.1   0.3
-##  2 13001… Geor… Appli… 9502   0.0458 4-Most…     0   0     0.3   0.1   0.2   0.5
-##  3 13001… Geor… Appli… 9503   0.0269 3-Abov…     0   0     0.2   0     0.2   0.4
-##  4 13001… Geor… Appli… 9504  -0.0083 2-Belo…     0   0     0.1   0     0.1   0.3
-##  5 13001… Geor… Appli… 9505   0.0231 3-Abov…     0   0     0.2   0     0.2   0.4
-##  6 13003… Geor… Atkin… 9601   0.0619 4-Most…     0   0.1   0.2   0.2   0.2   0.5
-##  7 13003… Geor… Atkin… 9602   0.0593 4-Most…     0   0.1   0.3   0.1   0.2   0.4
-##  8 13003… Geor… Atkin… 9603   0.0252 3-Abov…     0   0     0.3   0.1   0.2   0.4
-##  9 13005… Geor… Bacon… 9701   0.0061 3-Abov…     0   0     0.2   0     0.2   0.4
-## 10 13005… Geor… Bacon… 9702…  0.0121 3-Abov…     0   0     0.2   0.1   0.1   0.5
-## # … with 1,959 more rows, 2 more variables: EDU <dbl>, EMP <dbl>, and
-## #   abbreviated variable name ¹​NDIQuart
+## GEOID state county tract NDI NDIQuart OCC CWD POV FHH PUB U30 +## <chr> <chr> <chr> <chr> <dbl> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +## 1 1300… Geor… Appli… 9501 -0.0075 2-Below… 0 0 0.1 0.1 0.1 0.3 +## 2 1300… Geor… Appli… 9502 0.0458 4-Most … 0 0 0.3 0.1 0.2 0.5 +## 3 1300… Geor… Appli… 9503 0.0269 3-Above… 0 0 0.2 0 0.2 0.4 +## 4 1300… Geor… Appli… 9504 -0.0083 2-Below… 0 0 0.1 0 0.1 0.3 +## 5 1300… Geor… Appli… 9505 0.0231 3-Above… 0 0 0.2 0 0.2 0.4 +## 6 1300… Geor… Atkin… 9601 0.0619 4-Most … 0 0.1 0.2 0.2 0.2 0.5 +## 7 1300… Geor… Atkin… 9602 0.0593 4-Most … 0 0.1 0.3 0.1 0.2 0.4 +## 8 1300… Geor… Atkin… 9603 0.0252 3-Above… 0 0 0.3 0.1 0.2 0.4 +## 9 1300… Geor… Bacon… 9701 0.0061 3-Above… 0 0 0.2 0 0.2 0.4 +## 10 1300… Geor… Bacon… 9702… 0.0121 3-Above… 0 0 0.2 0.1 0.1 0.5 +## # ℹ 1,959 more rows +## # ℹ 2 more variables: EDU <dbl>, EMP <dbl>

A second output from the messer() function is the -results from the principal component analysis used to compute the NDI -(Messer) values.

-
messer2010GA$pca
+results from the principal component analysis used to compute the +NDI (Messer) values.

+
messer2010GA$pca
## Principal Components Analysis
-## Call: psych::principal(r = ndi_vars_pca, nfactors = 1, n.obs = nrow(ndi_vars_pca), 
+## Call: psych::principal(r = ndi_data_pca, nfactors = 1, n.obs = nrow(ndi_data_pca), 
 ##     covar = FALSE, scores = TRUE, missing = imp)
 ## Standardized loadings (pattern matrix) based upon correlation matrix
 ##       PC1   h2   u2 com
@@ -481,8 +481,8 @@ 

Compute NDI (Messer)

## Fit based upon off diagonal values = 0.95

A third output from the messer() function is a tibble containing a breakdown of the missingness of the census characteristics -used to compute the NDI (Messer) values.

-
messer2010GA$missing
+used to compute the NDI (Messer) values.

+
messer2010GA$missing
## # A tibble: 8 × 4
 ##   variable total n_missing percent_missing
 ##   <chr>    <int>     <int> <chr>          
@@ -494,108 +494,137 @@ 

Compute NDI (Messer)

## 6 POV 1969 14 0.71 % ## 7 PUB 1969 14 0.71 % ## 8 U30 1969 14 0.71 %
-

We can visualize the NDI (Messer) values geographically by linking -them to spatial information from the tigris package and -plotting with the ggplot2 package suite.

-
# Obtain the 2010 counties from the "tigris" package
-county2010GA <- tigris::counties(state = "GA", year = 2010, cb = TRUE)
-# Remove first 9 characters from GEOID for compatibility with tigris information
-county2010GA$GEOID <- substring(county2010GA$GEO_ID, 10) 
-
-# Obtain the 2010 census tracts from the "tigris" package
-tract2010GA <- tigris::tracts(state = "GA", year = 2010, cb = TRUE)
-# Remove first 9 characters from GEOID for compatibility with tigris information
-tract2010GA$GEOID <- substring(tract2010GA$GEO_ID, 10) 
-
-# Join the NDI (Messer) values to the census tract geometry
-GA2010messer <- dplyr::left_join(tract2010GA, messer2010GA$ndi, by = "GEOID")
-
# Visualize the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., census tracts 
-## Continuous Index
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = GA2010messer, 
-                   ggplot2::aes(fill = NDI),
-                   size = 0.05,
-                   color = "transparent") +
-   ggplot2::geom_sf(data = county2010GA,
-                   fill = "transparent", 
-                   color = "white",
-                   size = 0.2) +
-  ggplot2::theme_minimal() +
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Messer)",
-                   subtitle = "GA census tracts as the referent")
-
-## Categorical Index
-### Rename "9-NDI not avail" level as NA for plotting
-GA2010messer$NDIQuartNA <- factor(replace(as.character(GA2010messer$NDIQuart), 
-                                            GA2010messer$NDIQuart == "9-NDI not avail", NA),
-                                  c(levels(GA2010messer$NDIQuart)[-5], NA))
-
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = GA2010messer, 
-                   ggplot2::aes(fill = NDIQuartNA),
-                   size = 0.05,
-                   color = "transparent") +
-   ggplot2::geom_sf(data = county2010GA,
-                   fill = "transparent", 
-                   color = "white",
-                   size = 0.2) +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE),
-                                na.value = "grey80") +
-  ggplot2::labs(fill = "Index (Categorical)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Messer) Quartiles",
-                   subtitle = "GA census tracts as the referent")
-

-

The results above are at the tract level. The NDI (Messer) values can -also be calculated at the county level.

-
messer2010GA_county <- ndi::messer(geo = "county", state = "GA", year = 2010)
-
-# Join the NDI (Messer) values to the county geometry
-GA2010messer_county <- dplyr::left_join(county2010GA, messer2010GA_county$ndi, by = "GEOID")
-
# Visualize the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., counties
-## Continuous Index
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = GA2010messer_county, 
-                   ggplot2::aes(fill = NDI),
-                   size = 0.20,
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Messer)",
-                   subtitle = "GA counties as the referent")
-
-## Categorical Index
-
-### Rename "9-NDI not avail" level as NA for plotting
-GA2010messer_county$NDIQuartNA <- factor(replace(as.character(GA2010messer_county$NDIQuart), 
-                                            GA2010messer_county$NDIQuart == "9-NDI not avail", NA),
-                                         c(levels(GA2010messer_county$NDIQuart)[-5], NA))
-
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = GA2010messer_county, 
-                   ggplot2::aes(fill = NDIQuartNA),
-                   size = 0.20,
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE),
-                                na.value = "grey80") +
-  ggplot2::labs(fill = "Index (Categorical)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Messer) Quartiles",
-                   subtitle = "GA counties as the referent")
-

+

We can visualize the NDI (Messer) values geographically by +linking them to spatial information from the [tigris](tidycensus +package and plotting with the [ggplot2](tidycensus +package suite.

+
# Obtain the 2010 counties from the 'tigris' package
+county2010GA <- counties(state = 'GA', year = 2010, cb = TRUE)
+# Remove first 9 characters from GEOID for compatibility with tigris information
+county2010GA$GEOID <- substring(county2010GA$GEO_ID, 10) 
+
+# Obtain the 2010 census tracts from the 'tigris' package
+tract2010GA <- tracts(state = 'GA', year = 2010, cb = TRUE)
+# Remove first 9 characters from GEOID for compatibility with tigris information
+tract2010GA$GEOID <- substring(tract2010GA$GEO_ID, 10) 
+
+# Join the NDI (Messer) values to the census tract geometry
+GA2010messer <- tract2010GA %>%
+  left_join(messer2010GA$ndi, by = 'GEOID')
+
# Visualize the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., census tracts 
+## Continuous Index
+ggplot() +
+  geom_sf(
+    data = GA2010messer,
+    aes(fill = NDI),
+    size = 0.05,
+    color = 'transparent'
+  ) +
+  geom_sf(
+    data = county2010GA,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Messer)',
+    subtitle = 'GA census tracts as the referent'
+  )
+
+## Categorical Index
+### Rename '9-NDI not avail' level as NA for plotting
+GA2010messer$NDIQuartNA <-
+  factor(
+    replace(
+      as.character(GA2010messer$NDIQuart),
+      GA2010messer$NDIQuart == '9-NDI not avail',
+      NA
+    ),
+    c(levels(GA2010messer$NDIQuart)[-5], NA)
+  )
+
+ggplot() +
+  geom_sf(
+    data = GA2010messer,
+    aes(fill = NDIQuartNA),
+    size = 0.05,
+    color = 'transparent'
+  ) +
+  geom_sf(
+    data = county2010GA,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') +
+  labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Messer) Quartiles',
+    subtitle = 'GA census tracts as the referent'
+  )
+

+

The results above are at the tract level. The NDI (Messer) +values can also be calculated at the county level.

+
messer2010GA_county <- messer(geo = 'county', state = 'GA', year = 2010)
+
+# Join the NDI (Messer) values to the county geometry
+GA2010messer_county <- county2010GA %>%
+  left_join(messer2010GA_county$ndi, by = 'GEOID')
+
# Visualize the NDI (Messer) values (2006-2010 5-year ACS) for Georgia, U.S.A., counties
+## Continuous Index
+ggplot() +
+  geom_sf(
+    data = GA2010messer_county,
+    aes(fill = NDI),
+    size = 0.20,
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Messer)',
+    subtitle = 'GA counties as the referent'
+  )
+
+## Categorical Index
+
+### Rename '9-NDI not avail' level as NA for plotting
+GA2010messer_county$NDIQuartNA <-
+  factor(
+    replace(
+      as.character(GA2010messer_county$NDIQuart),
+      GA2010messer_county$NDIQuart == '9-NDI not avail',
+      NA
+    ),
+    c(levels(GA2010messer_county$NDIQuart)[-5], NA)
+  )
+
+ggplot() +
+  geom_sf(
+    data = GA2010messer_county,
+    aes(fill = NDIQuartNA),
+    size = 0.20,
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') +
+  labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Messer) Quartiles',
+    subtitle = 'GA counties as the referent'
+  )
+

-

Compute NDI (Powell-Wiley)

-

Compute the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for -Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., census -tracts. This metric is based on Andrews et +

Compute NDI (Powell-Wiley)

+

Compute the NDI (Powell-Wiley) values (2016-2020 5-year ACS) +for Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., +census tracts. This metric is based on Andrews et al. (2020) and Slotman et al. (2022) with socio-economic status (SES) variables chosen by Roux and Mair (2010):

@@ -701,35 +730,40 @@

Compute NDI (Powell-Wiley)

More information about the codebook and computation -of the NDI (Powell-Wiley) can be found on a GIS Portal for +of the NDI (Powell-Wiley) can be found on a GIS Portal for Cancer Research website.

-
powell_wiley2020DMVW <- ndi::powell_wiley(state = c("DC", "MD", "VA", "WV"), year = 2020, round_output = TRUE)
+
powell_wiley2020DMVW <- powell_wiley(
+  state = c('DC', 'MD', 'VA', 'WV'),
+  year = 2020,
+  round_output = TRUE
+)

One output from the powell_wiley() function is a tibble -containing the identification, geographic name, NDI (Powell-Wiley) -values, and raw census characteristics for each tract.

-
powell_wiley2020DMVW$ndi
+containing the identification, geographic name, NDI +(Powell-Wiley) values, and raw census characteristics for each +tract.

+
powell_wiley2020DMVW$ndi
## # A tibble: 4,425 × 20
-##    GEOID       state  county tract   NDI NDIQu…¹ MedHH…² PctRe…³ PctPu…⁴ MedHo…⁵
-##    <chr>       <chr>  <chr>  <chr> <dbl> <fct>     <dbl>   <dbl>   <dbl>   <dbl>
-##  1 11001000101 Distr… Distr… 1.01  -2.13 1-Leas…  187839    50.9     0.8  699100
-##  2 11001000102 Distr… Distr… 1.02  -2.46 1-Leas…  184167    52.2     0.6 1556000
-##  3 11001000201 Distr… Distr… 2.01  NA    9-NDI …      NA   NaN     NaN        NA
-##  4 11001000202 Distr… Distr… 2.02  -2.30 1-Leas…  164261    49.6     0.9 1309100
-##  5 11001000300 Distr… Distr… 3     -2.06 1-Leas…  156483    46       0.6  976500
-##  6 11001000400 Distr… Distr… 4     -2.09 1-Leas…  153397    47.8     0   1164200
-##  7 11001000501 Distr… Distr… 5.01  -2.11 1-Leas…  119911    44.5     0.8  674600
-##  8 11001000502 Distr… Distr… 5.02  -2.21 1-Leas…  153264    46.8     0.5 1012500
-##  9 11001000600 Distr… Distr… 6     -2.16 1-Leas…  154266    60.8     7.4 1109800
-## 10 11001000702 Distr… Distr… 7.02  -1.20 1-Leas…   71747    22.9     0    289900
-## # … with 4,415 more rows, 10 more variables: PctMgmtBusScArti <dbl>,
+##    GEOID       state  county tract   NDI NDIQuint MedHHInc PctRecvIDR PctPubAsst
+##    <chr>       <chr>  <chr>  <chr> <dbl> <fct>       <dbl>      <dbl>      <dbl>
+##  1 11001000101 Distr… Distr… 1.01  -2.13 1-Least…   187839       50.9        0.8
+##  2 11001000102 Distr… Distr… 1.02  -2.46 1-Least…   184167       52.2        0.6
+##  3 11001000201 Distr… Distr… 2.01  NA    9-NDI n…       NA      NaN        NaN  
+##  4 11001000202 Distr… Distr… 2.02  -2.30 1-Least…   164261       49.6        0.9
+##  5 11001000300 Distr… Distr… 3     -2.06 1-Least…   156483       46          0.6
+##  6 11001000400 Distr… Distr… 4     -2.09 1-Least…   153397       47.8        0  
+##  7 11001000501 Distr… Distr… 5.01  -2.11 1-Least…   119911       44.5        0.8
+##  8 11001000502 Distr… Distr… 5.02  -2.21 1-Least…   153264       46.8        0.5
+##  9 11001000600 Distr… Distr… 6     -2.16 1-Least…   154266       60.8        7.4
+## 10 11001000702 Distr… Distr… 7.02  -1.20 1-Least…    71747       22.9        0  
+## # ℹ 4,415 more rows
+## # ℹ 11 more variables: MedHomeVal <dbl>, PctMgmtBusScArti <dbl>,
 ## #   PctFemHeadKids <dbl>, PctOwnerOcc <dbl>, PctNoPhone <dbl>,
 ## #   PctNComPlmb <dbl>, PctEducHSPlus <dbl>, PctEducBchPlus <dbl>,
-## #   PctFamBelowPov <dbl>, PctUnempl <dbl>, TotalPop <dbl>, and abbreviated
-## #   variable names ¹​NDIQuint, ²​MedHHInc, ³​PctRecvIDR, ⁴​PctPubAsst, ⁵​MedHomeVal
+## # PctFamBelowPov <dbl>, PctUnempl <dbl>, TotalPop <dbl>

A second output from the powell_wiley() function is the -results from the principal component analysis used to compute the NDI -(Powell-Wiley) values.

-
powell_wiley2020DMVW$pca
+results from the principal component analysis used to compute the +NDI (Powell-Wiley) values.

+
powell_wiley2020DMVW$pca
## $loadings
 ## 
 ## Loadings:
@@ -805,130 +839,153 @@ 

Compute NDI (Powell-Wiley)

## Cumulative Proportion 0.5157997 0.8761900 1.00000000

A third output from the powell_wiley() function is a tibble containing a breakdown of the missingness of the census -characteristics used to compute the NDI (Powell-Wiley) values.

-
powell_wiley2020DMVW$missing
+characteristics used to compute the NDI (Powell-Wiley) +values.

+
powell_wiley2020DMVW$missing
## # A tibble: 13 × 4
 ##    variable        total n_missing percent_missing
 ##    <chr>           <int>     <int> <chr>          
-##  1 logMedHHInc      4425        73 1.65 %         
-##  2 logMedHomeVal    4425       148 3.34 %         
-##  3 PctEducLTBchZ    4425        47 1.06 %         
-##  4 PctEducLTHSZ     4425        47 1.06 %         
-##  5 PctFamBelowPovZ  4425        63 1.42 %         
-##  6 PctFemHeadKidsZ  4425        60 1.36 %         
-##  7 PctNComPlmbZ     4425        60 1.36 %         
-##  8 PctNoIDRZ        4425        60 1.36 %         
-##  9 PctNoPhoneZ      4425        60 1.36 %         
-## 10 PctNotOwnerOccZ  4425        60 1.36 %         
-## 11 PctPubAsstZ      4425        60 1.36 %         
-## 12 PctUnemplZ       4425        57 1.29 %         
-## 13 PctWorkClassZ    4425        57 1.29 %
+## 1 PctEducLTBchZ 4425 47 1.06 % +## 2 PctEducLTHSZ 4425 47 1.06 % +## 3 PctFamBelowPovZ 4425 63 1.42 % +## 4 PctFemHeadKidsZ 4425 60 1.36 % +## 5 PctNComPlmbZ 4425 60 1.36 % +## 6 PctNoIDRZ 4425 60 1.36 % +## 7 PctNoPhoneZ 4425 60 1.36 % +## 8 PctNotOwnerOccZ 4425 60 1.36 % +## 9 PctPubAsstZ 4425 60 1.36 % +## 10 PctUnemplZ 4425 57 1.29 % +## 11 PctWorkClassZ 4425 57 1.29 % +## 12 logMedHHInc 4425 73 1.65 % +## 13 logMedHomeVal 4425 148 3.34 %

A fourth output from the powell_wiley() function is a character string or numeric value of a standardized Cronbach’s alpha. A value greater than 0.7 is desired.

-
powell_wiley2020DMVW$cronbach
-
## [1] 0.931138
-

We can visualize the NDI (Powell-Wiley) values geographically by -linking them to spatial information from the tigris package -and plotting with the ggplot2 package suite.

-
# Obtain the 2020 counties from the "tigris" package
-county2020 <- tigris::counties(cb = TRUE)
-county2020DMVW <- county2020[county2020$STUSPS %in% c("DC", "MD", "VA", "WV"), ]
-
-# Obtain the 2020 census tracts from the "tigris" package
-tract2020D <- tigris::tracts(state = "DC", year = 2020, cb = TRUE)
-tract2020M <- tigris::tracts(state = "MD", year = 2020, cb = TRUE)
-tract2020V <- tigris::tracts(state = "VA", year = 2020, cb = TRUE)
-tract2020W <- tigris::tracts(state = "WV", year = 2020, cb = TRUE)
-tracts2020DMVW <- rbind(tract2020D, tract2020M, tract2020V, tract2020W)
-
-# Join the NDI (Powell-Wiley) values to the census tract geometry
-DMVW2020pw <- dplyr::left_join(tracts2020DMVW, powell_wiley2020DMVW$ndi, by = "GEOID")
-
# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) 
-## Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., census tracts 
-## Continuous Index
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = DMVW2020pw, 
-                   ggplot2::aes(fill = NDI), 
-                   color = NA) +
-  ggplot2::geom_sf(data = county2020DMVW,
-                   fill = "transparent", 
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_c(na.value = "grey80") +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2016-2020 estimates")+
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley)",
-                   subtitle = "DC, MD, VA, and WV tracts as the referent")
-
-## Categorical Index (Population-weighted quintiles)
-### Rename "9-NDI not avail" level as NA for plotting
-DMVW2020pw$NDIQuintNA <- factor(replace(as.character(DMVW2020pw$NDIQuint), 
-                                        DMVW2020pw$NDIQuint == "9-NDI not avail", NA),
-                                c(levels(DMVW2020pw$NDIQuint)[-6], NA))
-
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = DMVW2020pw, 
-                   ggplot2::aes(fill = NDIQuintNA), 
-                   color = NA) +
-  ggplot2::geom_sf(data = county2020DMVW,
-                   fill = "transparent", 
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE),
-                                na.value = "grey80") +
-  ggplot2::labs(fill = "Index (Categorical)",
-                caption = "Source: U.S. Census ACS 2016-2020 estimates")+
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley) Population-weighted Quintiles",
-                   subtitle = "DC, MD, VA, and WV tracts as the referent")
-

-

Like the NDI (Messer), we also compute county-level NDI -(Powell-Wiley).

-
# Obtain the 2020 counties from the "tigris" package
-county2020DMVW <- tigris::counties(state = c("DC", "MD", "VA", "WV"), year = 2020, cb = TRUE)
-
-# NDI (Powell-Wiley) at the county level (2016-2020)
-powell_wiley2020DMVW_county <- ndi::powell_wiley(geo = "county",
-                                                 state = c("DC", "MD", "VA", "WV"),
-                                                 year = 2020)
-
-# Join the NDI (Powell-Wiley) values to the county geometry
-DMVW2020pw_county <- dplyr::left_join(county2020DMVW, powell_wiley2020DMVW_county$ndi, by = "GEOID")
-
# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS)
-## Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., counties
-## Continuous Index
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = DMVW2020pw_county, 
-                   ggplot2::aes(fill = NDI),
-                   size = 0.20,
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2016-2020 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley)",
-                   subtitle = "DC, MD, VA, and WV counties as the referent")
-
-## Categorical Index
-
-### Rename "9-NDI not avail" level as NA for plotting
-DMVW2020pw_county$NDIQuintNA <- factor(replace(as.character(DMVW2020pw_county$NDIQuint), 
-                                            DMVW2020pw_county$NDIQuint == "9-NDI not avail", NA),
-                                         c(levels(DMVW2020pw_county$NDIQuint)[-6], NA))
-
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = DMVW2020pw_county, 
-                   ggplot2::aes(fill = NDIQuint),
-                   size = 0.20,
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE),
-                                na.value = "grey80") +
-  ggplot2::labs(fill = "Index (Categorical)",
-                caption = "Source: U.S. Census ACS 2016-2020 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley) Population-weighted Quintiles",
-                   subtitle = "DC, MD, VA, and WV counties as the referent")
-

+
powell_wiley2020DMVW$cronbach
+
## [1] 0.9321693
+

We can visualize the NDI (Powell-Wiley) values +geographically by linking them to spatial information from the +[tigris](tidycensus +package and plotting with the [ggplot2](tidycensus +package suite.

+
# Obtain the 2020 counties from the 'tigris' package
+county2020 <- counties(cb = TRUE)
+county2020DMVW <- county2020[county2020$STUSPS %in% c('DC', 'MD', 'VA', 'WV'), ]
+
+# Obtain the 2020 census tracts from the 'tigris' package
+tract2020D <- tracts(state = 'DC', year = 2020, cb = TRUE)
+tract2020M <- tracts(state = 'MD', year = 2020, cb = TRUE)
+tract2020V <- tracts(state = 'VA', year = 2020, cb = TRUE)
+tract2020W <- tracts(state = 'WV', year = 2020, cb = TRUE)
+tracts2020DMVW <- rbind(tract2020D, tract2020M, tract2020V, tract2020W)
+
+# Join the NDI (Powell-Wiley) values to the census tract geometry
+DMVW2020pw <- tracts2020DMVW %>%
+  left_join(powell_wiley2020DMVW$ndi, by = 'GEOID')
+
# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) 
+## Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., census tracts 
+## Continuous Index
+ggplot() +
+  geom_sf(
+    data = DMVW2020pw,
+    aes(fill = NDI),
+    color = NA
+  ) +
+  geom_sf(
+    data = county2020DMVW,
+    fill = 'transparent',
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c(na.value = 'grey80') +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Powell-Wiley)',
+    subtitle = 'DC, MD, VA, and WV tracts as the referent'
+  )
+
+## Categorical Index (Population-weighted quintiles)
+### Rename '9-NDI not avail' level as NA for plotting
+DMVW2020pw$NDIQuintNA <-
+  factor(replace(
+    as.character(DMVW2020pw$NDIQuint),
+    DMVW2020pw$NDIQuint == '9-NDI not avail',
+    NA
+  ),
+  c(levels(DMVW2020pw$NDIQuint)[-6], NA))
+
+ggplot() +
+  geom_sf(data = DMVW2020pw, aes(fill = NDIQuintNA), color = NA) +
+  geom_sf(data = county2020DMVW, fill = 'transparent', color = 'white') +
+  theme_minimal() +
+  scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') +
+  labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Powell-Wiley) Population-weighted Quintiles',
+    subtitle = 'DC, MD, VA, and WV tracts as the referent'
+  )
+

+

Like the NDI (Messer), we also compute county-level +NDI (Powell-Wiley).

+
# Obtain the 2020 counties from the 'tigris' package
+county2020DMVW <- counties(state = c('DC', 'MD', 'VA', 'WV'), year = 2020, cb = TRUE)
+
+# NDI (Powell-Wiley) at the county level (2016-2020)
+powell_wiley2020DMVW_county <- powell_wiley(
+  geo = 'county',
+  state = c('DC', 'MD', 'VA', 'WV'),
+  year = 2020
+)
+
+# Join the NDI (Powell-Wiley) values to the county geometry
+DMVW2020pw_county <- county2020DMVW %>%
+  left_join(powell_wiley2020DMVW_county$ndi, by = 'GEOID')
+
# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS)
+## Maryland, Virginia, Washington, D.C., and West Virginia, U.S.A., counties
+## Continuous Index
+ggplot() +
+  geom_sf(
+    data = DMVW2020pw_county,
+    aes(fill = NDI),
+    size = 0.20,
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Powell-Wiley)',
+    subtitle = 'DC, MD, VA, and WV counties as the referent'
+  )
+
+## Categorical Index
+
+### Rename '9-NDI not avail' level as NA for plotting
+DMVW2020pw_county$NDIQuintNA <-
+  factor(
+    replace(
+      as.character(DMVW2020pw_county$NDIQuint),
+      DMVW2020pw_county$NDIQuint == '9-NDI not avail',
+      NA
+    ),
+    c(levels(DMVW2020pw_county$NDIQuint)[-6], NA)
+  )
+
+ggplot() +
+  geom_sf(
+    data = DMVW2020pw_county,
+    aes(fill = NDIQuint),
+    size = 0.20,
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') +
+  labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Powell-Wiley) Population-weighted Quintiles',
+    subtitle = 'DC, MD, VA, and WV counties as the referent'
+  )
+

Advanced Features

@@ -937,149 +994,203 @@

Imputing missing census variables

In the messer() and powell_wiley() functions, missing census characteristics can be imputed using the missing and impute arguments of the -pca() function in the psych package called -within the messer() and powell_wiley() -functions. Impute values using the logical imp argument -(currently only calls impute = "median" by default, which -assigns the median values of each missing census variable for a -geography).

-
powell_wiley2020DC <- ndi::powell_wiley(state = "DC", year = 2020) # without imputation
-powell_wiley2020DCi <- ndi::powell_wiley(state = "DC", year = 2020, imp = TRUE) # with imputation
-
-table(is.na(powell_wiley2020DC$ndi$NDI)) # n=13 tracts without NDI (Powell-Wiley) values
-table(is.na(powell_wiley2020DCi$ndi$NDI)) # n=0 tracts without NDI (Powell-Wiley) values
-
-# Obtain the 2020 census tracts from the "tigris" package
-tract2020DC <- tigris::tracts(state = "DC", year = 2020, cb = TRUE)
-
-# Join the NDI (Powell-Wiley) values to the census tract geometry
-DC2020pw <- dplyr::left_join(tract2020DC, powell_wiley2020DC$ndi, by = "GEOID")
-DC2020pw <- dplyr::left_join(DC2020pw, powell_wiley2020DCi$ndi, by = "GEOID", suffix = c("_nonimp", "_imp"))
-
# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for Washington, D.C., census tracts
-## Continuous Index
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = DC2020pw, 
-                   ggplot2::aes(fill = NDI_nonimp),
-                   size = 0.2,
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2016-2020 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley), Non-Imputed",
-                   subtitle = "DC census tracts as the referent")
-
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = DC2020pw, 
-                   ggplot2::aes(fill = NDI_imp),
-                   size = 0.2,
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2016-2020 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley), Imputed",
-                   subtitle = "DC census tracts as the referent")
-
-## Categorical Index
-### Rename "9-NDI not avail" level as NA for plotting
-DC2020pw$NDIQuintNA_nonimp <- factor(replace(as.character(DC2020pw$NDIQuint_nonimp), 
-                                            DC2020pw$NDIQuint_nonimp == "9-NDI not avail", NA),
-                                         c(levels(DC2020pw$NDIQuint_nonimp)[-6], NA))
-
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = DC2020pw, 
-                   ggplot2::aes(fill = NDIQuintNA_nonimp),
-                   size = 0.2,
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE),
-                                na.value = "grey80") +
-  ggplot2::labs(fill = "Index (Categorical)",
-                caption = "Source: U.S. Census ACS 2016-2020 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley) Quintiles, Non-Imputed",
-                   subtitle = "DC census tracts as the referent")
-
-### Rename "9-NDI not avail" level as NA for plotting
-DC2020pw$NDIQuintNA_imp <- factor(replace(as.character(DC2020pw$NDIQuint_imp), 
-                                            DC2020pw$NDIQuint_imp == "9-NDI not avail", NA),
-                                      c(levels(DC2020pw$NDIQuint_imp)[-6], NA))
-
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = DC2020pw, 
-                   ggplot2::aes(fill = NDIQuintNA_imp),
-                   size = 0.2,
-                   color = "white") +
-  ggplot2::theme_minimal() + 
-  ggplot2::scale_fill_viridis_d(guide = ggplot2::guide_legend(reverse = TRUE),
-                                na.value = "grey80") +
-  ggplot2::labs(fill = "Index (Categorical)",
-                caption = "Source: U.S. Census ACS 2016-2020 estimates") +
-  ggplot2::ggtitle("Neighborhood Deprivation Index (Powell-Wiley) Quintiles, Imputed",
-                   subtitle = "DC census tracts as the referent")
-

+pca() function in the psych +package called within the messer() and +powell_wiley() functions. Impute values using the logical +imp argument (currently only calls +impute = 'median' by default, which assigns the median +values of each missing census variable for a geography).

+
powell_wiley2020DC <- powell_wiley(state = 'DC', year = 2020) # without imputation
+powell_wiley2020DCi <- powell_wiley(state = 'DC', year = 2020, imp = TRUE) # with imputation
+
+table(is.na(powell_wiley2020DC$ndi$NDI)) # n=13 tracts without NDI (Powell-Wiley) values
+table(is.na(powell_wiley2020DCi$ndi$NDI)) # n=0 tracts without NDI (Powell-Wiley) values
+
+# Obtain the 2020 census tracts from the 'tigris' package
+tract2020DC <- tracts(state = 'DC', year = 2020, cb = TRUE)
+
+# Join the NDI (Powell-Wiley) values to the census tract geometry
+DC2020pw <- tract2020DC %>%
+  left_join(powell_wiley2020DC$ndi, by = 'GEOID')
+DC2020pw <- DC2020pw %>%
+  left_join(powell_wiley2020DCi$ndi, by = 'GEOID', suffix = c('_nonimp', '_imp'))
+
# Visualize the NDI (Powell-Wiley) values (2016-2020 5-year ACS) for 
+## Washington, D.C., census tracts
+## Continuous Index
+ggplot() +
+  geom_sf(
+    data = DC2020pw,
+    aes(fill = NDI_nonimp),
+    size = 0.2,
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Powell-Wiley), Non-Imputed',
+    subtitle = 'DC census tracts as the referent'
+  )
+
+ggplot() +
+  geom_sf(
+    data = DC2020pw,
+    aes(fill = NDI_imp),
+    size = 0.2,
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Powell-Wiley), Imputed',
+    subtitle = 'DC census tracts as the referent'
+  )
+
+## Categorical Index
+### Rename '9-NDI not avail' level as NA for plotting
+DC2020pw$NDIQuintNA_nonimp <-
+  factor(
+    replace(
+      as.character(DC2020pw$NDIQuint_nonimp),
+      DC2020pw$NDIQuint_nonimp == '9-NDI not avail',
+      NA
+    ),
+    c(levels(DC2020pw$NDIQuint_nonimp)[-6], NA)
+  )
+
+ggplot() +
+  geom_sf(
+    data = DC2020pw,
+    aes(fill = NDIQuintNA_nonimp),
+    size = 0.2,
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') +
+  labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Powell-Wiley) Quintiles, Non-Imputed',
+    subtitle = 'DC census tracts as the referent'
+  )
+
+### Rename '9-NDI not avail' level as NA for plotting
+DC2020pw$NDIQuintNA_imp <-
+  factor(
+    replace(
+      as.character(DC2020pw$NDIQuint_imp),
+      DC2020pw$NDIQuint_imp == '9-NDI not avail',
+      NA
+    ),
+    c(levels(DC2020pw$NDIQuint_imp)[-6], NA)
+  )
+
+ggplot() +
+  geom_sf(
+    data = DC2020pw,
+    aes(fill = NDIQuintNA_imp),
+    size = 0.2,
+    color = 'white'
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_d(guide = guide_legend(reverse = TRUE), na.value = 'grey80') +
+  labs(fill = 'Index (Categorical)', caption = 'Source: U.S. Census ACS 2016-2020 estimates') +
+  ggtitle(
+    'Neighborhood Deprivation Index (Powell-Wiley) Quintiles, Imputed',
+    subtitle = 'DC census tracts as the referent'
+  )
+

Assign the referent (U.S.-Standardized Metric)

-

To conduct a contiguous US-standardized index, compute an NDI for all -states as in the example below that replicates the nationally -standardized NDI (Powell-Wiley) values (2013-2017 ACS-5) found in Slotman et +

To conduct a contiguous US-standardized index, compute an +NDI for all states as in the example below that replicates the +nationally standardized NDI (Powell-Wiley) values (2013-2017 +ACS-5) found in Slotman et al. (2022) and available from a GIS Portal for Cancer Research website. To replicate the nationally standardized -NDI (Powell-Wiley) values (2006-2010 ACS-5) found in Andrews et +NDI (Powell-Wiley) values (2006-2010 ACS-5) found in Andrews et al. (2020) change the year argument to 2010 (i.e., year = 2010).

-
us <- tigris::states()
-n51 <- c("Commonwealth of the Northern Mariana Islands", "Guam", "American Samoa",
-         "Puerto Rico", "United States Virgin Islands")
-y51 <- us$STUSPS[!(us$NAME %in% n51)]
-
-start_time <- Sys.time() # record start time
-powell_wiley2017US <- ndi::powell_wiley(state = y51, year = 2017)
-end_time <- Sys.time() # record end time
-time_srr <- end_time - start_time # Calculate run time
-
ggplot2::ggplot(powell_wiley2017US$ndi, 
-                ggplot2::aes(x = NDI)) +
-  ggplot2::geom_histogram(color = "black",
-                          fill = "white") + 
-  ggplot2::theme_minimal() +
-  ggplot2::ggtitle("Histogram of US-standardized NDI (Powell-Wiley) values (2013-2017)",
-                   subtitle = "U.S. census tracts as the referent (including AK, HI, and DC)")
+
us <- states()
+n51 <- c(
+  'Commonwealth of the Northern Mariana Islands',
+  'Guam',
+  'American Samoa',
+  'Puerto Rico',
+  'United States Virgin Islands'
+)
+y51 <- us$STUSPS[!(us$NAME %in% n51)]
+
+start_time <- Sys.time() # record start time
+powell_wiley2017US <- powell_wiley(state = y51, year = 2017)
+end_time <- Sys.time() # record end time
+time_srr <- end_time - start_time # Calculate run time
+
ggplot(powell_wiley2017US$ndi, aes(x = NDI)) +
+  geom_histogram(color = 'black', fill = 'white') +
+  theme_minimal() +
+  ggtitle(
+    'Histogram of US-standardized NDI (Powell-Wiley) values (2013-2017)',
+    subtitle = 'U.S. census tracts as the referent (including AK, HI, and DC)'
+  )

-

The process to compute a US-standardized NDI (Powell-Wiley) took -about 2.6 minutes to run on a machine with the features listed at the -end of the vignette.

+

The process to compute a US-standardized NDI (Powell-Wiley) +took about 24.7 minutes to run on a machine with the features listed at +the end of the vignette.

Additional metrics socio-economic deprivation and disparity

-

Since version v0.1.1, the ndi package can compute -additional metrics of socio-economic deprivation and disparity beyond -neighborhood deprivation indices, including:

+

Since version v0.1.1, the ndi package +can compute additional metrics of socio-economic deprivation and +disparity beyond neighborhood deprivation indices with data from the +ACS-5, including:

  1. anthopolos() function that computes the Racial -Isolation Index (RI) based on Anthopolos et -al. (2011) with data from the ACS-5.
  2. +Isolation Index (RI) based on Anthopolos et +al. (2011)
  3. bravo() function that computes the Educational -Isolation Index (EI) based on Bravo et -al. (2021) with data from the ACS-5.
  4. -
  5. gini() function that retrieves the Gini Index based on -Gini (1921) from the -ACS-5.
  6. +Isolation Index (EI) based on Bravo et +al. (2021) +
  7. gini() function that retrieves the Gini Index +(G) based on Gini +(1921)
  8. krieger() function that computes the Index of -Concentration at the Extremes based on based on Feldman et -al. (2015) and Krieger et -al. (2016) with data from the ACS-5. 5. duncan() -function that computes the Dissimilarity Index based on on Duncan & Duncan (1955) -with data from the ACS-5.
  9. +Concentration at the Extremes (ICE) based on based on Feldman et +al. (2015) and Krieger et +al. (2016) +
  10. duncan() function that computes the Dissimilarity Index +(D) based on Duncan +& Duncan (1955)
  11. atkinson() function that computes the Atkinson Index -based on on Atkinson (1970) -with data from the ACS-5.
  12. +(A) based on Atkinson +(1970) +
  13. bell() function that computes the aspatial +racial/ethnic Interaction Index (xPy*) based on Shevky & +Williams (1949; ISBN-13:978-0-837-15637-8) and Bell (1954)
  14. +
  15. white() function that computes the aspatial +racial/ethnic Correlation Ratio (V) based on Bell (1954) and White (1986)
  16. +
  17. sudano() function that computes the aspatial +racial/ethnic Location Quotient (LQ) based on Merton (1939) and Sudano et +al. (2013)
  18. +
  19. bemanian_beyer() function that computes the aspatial +racial/ethnic Local Exposure and Isolation (LEx/Is) metric +based on Bemanian & +Beyer (2017)
  20. +
  21. hoover() function that computes the aspatial +racial/ethnic Delta (DEL) based on Hoover (1941) and +Duncan et al. (1961; LC:60007089)
  22. +
  23. white_blau() function that computes an index of spatial +proximity (SP) based on White (1986) and Blau (1977; +ISBN-13:978-0-029-03660-0)
  24. +
  25. lieberson() function that computes the aspatial +racial/ethnic Isolation Index (xPx*) based on Lieberson (1981; +ISBN-13:978-1-032-53884-6) and Bell (1954)
-

Compute Racial Isolation Index (RI)

-

Compute the RI (Anthopolos) values (2006-2010 5-year ACS) for North -Carolina, U.S.A., census tracts. This metric is based on Anthopolos et +

Compute Racial Isolation Index (RI)

+

Compute the spatial RI values (2006-2010 5-year ACS) for +North Carolina, U.S.A., census tracts. This metric is based on Anthopolos et al. (2011) that assessed the racial isolation of the population that identifies as non-Hispanic or Latino, Black or African American alone. Multiple racial/ethnic subgroups are available in the @@ -1207,90 +1318,107 @@

Compute Racial Isolation Index (RI)

A census geography (and its neighbors) that has nearly all of its population who identify with the specified race/ethnicity subgroup(s) (e.g., Not Hispanic or Latino, Black or African American alone) will -have an RI value close to 1. In contrast, a census geography (and its -neighbors) that is nearly none of its population who identify with the -specified race/ethnicity subgroup(s) (e.g., not Not Hispanic or Latino, -Black or African American alone) will have an RI value close to 0.

-
anthopolos2010NC <- ndi::anthopolos(state = "NC", year = 2010, subgroup = "NHoLB")
-
-# Obtain the 2010 census tracts from the "tigris" package
-tract2010NC <- tigris::tracts(state = "NC", year = 2010, cb = TRUE)
-# Remove first 9 characters from GEOID for compatibility with tigris information
-tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) 
-
-# Obtain the 2010 counties from the "tigris" package
-county2010NC <- tigris::counties(state = "NC", year = 2010, cb = TRUE)
-
-# Join the RI (Anthopolos) values to the census tract geometry
-NC2010anthopolos <- dplyr::left_join(tract2010NC, anthopolos2010NC$ri, by = "GEOID")
-
# Visualize the RI (Anthopolos) values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts 
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = NC2010anthopolos, 
-                   ggplot2::aes(fill = RI),
-                   size = 0.05,
-                   color = "transparent") +
-   ggplot2::geom_sf(data = county2010NC,
-                   fill = "transparent", 
-                   color = "white",
-                   size = 0.2) +
-  ggplot2::theme_minimal() +
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Racial Isolation Index (Anthopolos), non-Hispanic Black",
-                   subtitle = "NC census tracts (not corrected for edge effects)")
+have an RI value close to 1. In contrast, a census geography +(and its neighbors) that is nearly none of its population who identify +with the specified race/ethnicity subgroup(s) (e.g., not Not Hispanic or +Latino, Black or African American alone) will have an RI value +close to 0.

+
anthopolos2010NC <- anthopolos(state = 'NC', year = 2010, subgroup = 'NHoLB')
+
+# Obtain the 2010 census tracts from the 'tigris' package
+tract2010NC <- tracts(state = 'NC', year = 2010, cb = TRUE)
+# Remove first 9 characters from GEOID for compatibility with tigris information
+tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) 
+
+# Obtain the 2010 counties from the 'tigris' package
+county2010NC <- counties(state = 'NC', year = 2010, cb = TRUE)
+
+# Join the RI values to the census tract geometry
+NC2010anthopolos <- tract2010NC %>%
+  left_join(anthopolos2010NC$ri, by = 'GEOID')
+
# Visualize the RI values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts 
+ggplot() +
+  geom_sf(
+    data = NC2010anthopolos,
+    aes(fill = RI),
+    size = 0.05,
+    color = 'transparent'
+  ) +
+  geom_sf(
+    data = county2010NC,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Racial Isolation Index (Anthopolos), non-Hispanic Black',
+    subtitle = 'NC census tracts (not corrected for edge effects)'
+  )

-

The current version of the ndi package does not correct -for edge effects (e.g., census geographies along the specified spatial -extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have -few neighboring census geographies, and RI values in these census -geographies may be unstable. A stop-gap solution for the former source -of edge effect is to compute the RI for neighboring census geographies -(i.e., the states bordering a study area of interest) and then use the -estimates of the study area of interest.

-
# Compute RI for all census tracts in neighboring states
-anthopolos2010GNSTV <- ndi::anthopolos(state = c("GA", "NC", "SC", "TN", "VA"),
-                                     year = 2010, subgroup = "NHoLB")
-
-# Crop to only North Carolina, U.S.A. census tracts
-anthopolos2010NCe <- anthopolos2010GNSTV$ri[anthopolos2010GNSTV$ri$GEOID %in% anthopolos2010NC$ri$GEOID, ]
-
-# Obtain the 2010 census tracts from the "tigris" package
-tract2010NC <- tigris::tracts(state = "NC", year = 2010, cb = TRUE)
-# Remove first 9 characters from GEOID for compatibility with tigris information
-tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) 
-
-# Obtain the 2010 counties from the "tigris" package
-county2010NC <- tigris::counties(state = "NC", year = 2010, cb = TRUE)
-
-# Join the RI (Anthopolos) values to the census tract geometry
-edgeNC2010anthopolos <- dplyr::left_join(tract2010NC, anthopolos2010NCe, by = "GEOID")
-
# Visualize the RI (Anthopolos) values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts 
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = edgeNC2010anthopolos, 
-                   ggplot2::aes(fill = RI),
-                   size = 0.05,
-                   color = "transparent") +
-   ggplot2::geom_sf(data = county2010NC,
-                   fill = "transparent", 
-                   color = "white",
-                   size = 0.2) +
-  ggplot2::theme_minimal() +
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Racial Isolation Index (Anthopolos), non-Hispanic Black",
-                   subtitle = "NC census tracts (corrected for interstate edge effects)")
+

The current version of the ndi package +does not correct for edge effects (e.g., census geographies along the +specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada +border) may have few neighboring census geographies, and RI +values in these census geographies may be unstable. A stop-gap solution +for the former source of edge effect is to compute the RI for +neighboring census geographies (i.e., the states bordering a study area +of interest) and then use the estimates of the study area of +interest.

+
# Compute RI for all census tracts in neighboring states
+anthopolos2010GNSTV <- anthopolos(
+  state = c('GA', 'NC', 'SC', 'TN', 'VA'),
+  year = 2010,
+  subgroup = 'NHoLB'
+)
+
+# Crop to only North Carolina, U.S.A. census tracts
+anthopolos2010NCe <- anthopolos2010GNSTV$ri[anthopolos2010GNSTV$ri$GEOID %in% 
+                                              anthopolos2010NC$ri$GEOID, ]
+
+# Obtain the 2010 census tracts from the 'tigris' package
+tract2010NC <- tracts(state = 'NC', year = 2010, cb = TRUE)
+# Remove first 9 characters from GEOID for compatibility with tigris information
+tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) 
+
+# Obtain the 2010 counties from the 'tigris' package
+county2010NC <- counties(state = 'NC', year = 2010, cb = TRUE)
+
+# Join the RI values to the census tract geometry
+edgeNC2010anthopolos <- tract2010NC %>% 
+  left_join(anthopolos2010NCe, by = 'GEOID')
+
# Visualize the RI values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts 
+ggplot() +
+  geom_sf(
+    data = edgeNC2010anthopolos,
+    aes(fill = RI),
+    size = 0.05,
+    color = 'transparent'
+  ) +
+  geom_sf(
+    data = county2010NC,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Racial Isolation Index (Anthopolos), non-Hispanic Black',
+    subtitle = 'NC census tracts (corrected for interstate edge effects)'
+  )

-

Compute Educational Isolation Index (EI)

-

Compute the EI (Bravo) values (2006-2010 5-year ACS) for North -Carolina, U.S.A., census tracts. This metric is based on Bravo et -al. (2021) that assessed the educational isolation of the population -without a four-year college degree. Multiple educational attainment -categories are available in the bravo() function, -including:

+

Compute Educational Isolation Index (EI)

+

Compute the spatial EI (Bravo) values (2006-2010 5-year ACS) +for Oklahoma, U.S.A., census tracts. This metric is based on Bravo et al. (2021) +that assessed the educational isolation of the population without a +four-year college degree. Multiple educational attainment categories are +available in the bravo() function, including:

@@ -1332,103 +1460,116 @@

Compute Educational Isolation Index (EI)

-

Note: The ACS-5 data (2005-2009) uses the “B15002” question.

+

Note: The ACS-5 data (2005-2009) uses the ‘B15002’ question.

A census geography (and its neighbors) that has nearly all of its population with the specified educational attainment category (e.g., a -four-year college degree or more) will have an EI value close to 1. In -contrast, a census geography (and its neighbors) that is nearly none of -its population with the specified educational attainment category (e.g., -with a four-year college degree) will have an EI value close to 0.

-
bravo2010NC <- ndi::bravo(state = "NC", year = 2010, subgroup = c("LtHS", "HSGiE", "SCoAD"))
-
-# Obtain the 2010 census tracts from the "tigris" package
-tract2010NC <- tigris::tracts(state = "NC", year = 2010, cb = TRUE)
-# Remove first 9 characters from GEOID for compatibility with tigris information
-tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) 
-
-# Obtain the 2010 counties from the "tigris" package
-county2010NC <- tigris::counties(state = "NC", year = 2010, cb = TRUE)
-
-# Join the RI (Bravo) values to the census tract geometry
-NC2010bravo <- dplyr::left_join(tract2010NC, bravo2010NC$ei, by = "GEOID")
-
# Visualize the RI (Bravo) values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts 
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = NC2010bravo, 
-                   ggplot2::aes(fill = EI),
-                   size = 0.05,
-                   color = "transparent") +
-   ggplot2::geom_sf(data = county2010NC,
-                   fill = "transparent", 
-                   color = "white",
-                   size = 0.2) +
-  ggplot2::theme_minimal() +
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Educational Isolation Index (Bravo), without a four-year college degree",
-                   subtitle = "NC census tracts (not corrected for edge effects)")
-

+four-year college degree or more) will have an EI (Bravo) value +close to 1. In contrast, a census geography (and its neighbors) that is +nearly none of its population with the specified educational attainment +category (e.g., with a four-year college degree) will have an +EI (Bravo) value close to 0.

+
bravo2010OK <- bravo(state = 'OK', year = 2010, subgroup = c('LtHS', 'HSGiE', 'SCoAD'))
+
+# Obtain the 2010 census tracts from the 'tigris' package
+tract2010OK <- tracts(state = 'OK', year = 2010, cb = TRUE)
+# Remove first 9 characters from GEOID for compatibility with tigris information
+tract2010OK$GEOID <- substring(tract2010OK$GEO_ID, 10) 
+
+# Obtain the 2010 counties from the 'tigris' package
+county2010OK <- counties(state = 'OK', year = 2010, cb = TRUE)
+
+# Join the EI (Bravo) values to the census tract geometry
+OK2010bravo <- tract2010OK %>%
+  left_join(bravo2010OK$ei, by = 'GEOID')
+
# Visualize the EI (Bravo) values (2006-2010 5-year ACS) for Oklahoma, U.S.A., census tracts 
+ggplot() +
+  geom_sf(
+    data = OK2010bravo,
+    aes(fill = EI),
+    size = 0.05,
+    color = 'transparent'
+  ) +
+  geom_sf(
+    data = county2010OK,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c(limits = c(0, 1)) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Educational Isolation Index (Bravo), without a four-year college degree',
+    subtitle = 'OK census tracts (not corrected for edge effects)'
+  )
+

Can correct one source of edge effect in the same manner as shown for -the RI (Anthopolos) metric.

+the RI metric.

-
-

Retrieve the Gini Index

-

Retrieve the Gini Index values (2006-2010 5-year ACS) for North -Carolina, U.S.A., census tracts. This metric is based on Gini (1921), and the -gini() function retrieves the estimate from the ACS-5.

-

According to the U.S. -Census Bureau: “The Gini Index is a summary measure of income +

+

Retrieve the Gini Index (G)

+

Retrieve the aspatial Gini Index (G) values (2006-2010 +5-year ACS) for Massachusetts, U.S.A., census tracts. This metric is +based on Gini (1921), and +the gini() function retrieves the estimate from the +ACS-5.

+

According to the U.S. +Census Bureau: ‘The Gini Index is a summary measure of income inequality. The Gini coefficient incorporates the detailed shares data into a single statistic, which summarizes the dispersion of income across the entire income distribution. The Gini coefficient ranges from 0, indicating perfect equality (where everyone receives an equal share), to 1, perfect inequality (where only one recipient or group of -recipients receives all the income). The Gini is based on the difference -between the Lorenz curve (the observed cumulative income distribution) -and the notion of a perfectly equal income distribution.”

-
gini2010NC <- ndi::gini(state = "NC", year = 2010)
-
-# Obtain the 2010 census tracts from the "tigris" package
-tract2010NC <- tigris::tracts(state = "NC", year = 2010, cb = TRUE)
-# Remove first 9 characters from GEOID for compatibility with tigris information
-tract2010NC$GEOID <- substring(tract2010NC$GEO_ID, 10) 
-
-# Obtain the 2010 counties from the "tigris" package
-county2010NC <- tigris::counties(state = "NC", year = 2010, cb = TRUE)
-
-# Join the Gini Index values to the census tract geometry
-NC2010gini <- dplyr::left_join(tract2010NC, gini2010NC$gini, by = "GEOID")
-
# Visualize the Gini Index values (2006-2010 5-year ACS) for North Carolina, U.S.A., census tracts 
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = NC2010gini, 
-                   ggplot2::aes(fill = gini),
-                   size = 0.05,
-                   color = "transparent") +
-   ggplot2::geom_sf(data = county2010NC,
-                   fill = "transparent", 
-                   color = "white",
-                   size = 0.2) +
-  ggplot2::theme_minimal() +
-  ggplot2::scale_fill_viridis_c() +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Gini Index",
-                   subtitle = "NC census tracts")
-

+recipients receives all the income). G is based on the +difference between the Lorenz curve (the observed cumulative income +distribution) and the notion of a perfectly equal income +distribution.’

+
gini2010MA <- gini(state = 'MA', year = 2010)
+
+# Obtain the 2010 census tracts from the 'tigris' package
+tract2010MA <- tracts(state = 'MA', year = 2010, cb = TRUE)
+# Remove first 9 characters from GEOID for compatibility with tigris information
+tract2010MA$GEOID <- substring(tract2010MA$GEO_ID, 10) 
+
+# Obtain the 2010 counties from the 'tigris' package
+county2010MA <- counties(state = 'MA', year = 2010, cb = TRUE)
+
+# Join the G (Gini) values to the census tract geometry
+MA2010gini <- tract2010MA %>%
+  left_join(gini2010MA$g, by = 'GEOID')
+
# Visualize the G (Gini) values (2006-2010 5-year ACS) for Massachusetts, U.S.A., census tracts 
+ggplot() +
+  geom_sf(
+    data = MA2010gini,
+    aes(fill = G),
+    size = 0.05,
+    color = 'transparent'
+  ) +
+  geom_sf(
+    data = county2010MA,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle('Gini Index', subtitle = 'MA census tracts')
+

-

Index of Concentration at the Extremes (ICE)

-

Compute the Index of Concentration at the Extremes values (2006-2010 -5-year ACS) for Wayne County, Michigan, U.S.A., census tracts. Wayne -County is the home of Detroit, Michigan, a highly segregated city in the -U.S. This metric is based on Feldman et -al. (2015) and Krieger et +

Index of Concentration at the Extremes (ICE)

+

Compute the aspatial Index of Concentration at the Extremes values +(2006-2010 5-year ACS) for Wayne County, Michigan, U.S.A., census +tracts. Wayne County is the home of Detroit, Michigan, a highly +segregated city in the U.S. This metric is based on Feldman et +al. (2015) and Krieger et al. (2016) who expanded the metric designed by Massey in a chapter -of Booth & -Crouter (2001) initially designed for residential segregation. The -krieger() function computes five ICE metrics using the -following ACS-5 groups:

+of Booth & Crouter +(2001) initially designed for residential segregation. The +krieger() function computes five ICE metrics using +the following ACS-5 groups:

@@ -1438,133 +1579,176 @@

Index of Concentration at the Extremes (ICE)

- + - + - + - + - + - +
ACS table groupICE metricICE metric Comparison
B19001Income, “ICE_inc”Income, ‘ICE_inc’ 80th income percentile vs. 20th income percentile
B15002Education, “ICE_edu”Education, ‘ICE_edu’ less than high school vs. four-year college degree or more
B03002Race/Ethnicity, “ICE_rewb”Race/Ethnicity, ‘ICE_rewb’ 80th income percentile vs. 20th income percentile
B19001 & B19001B & B19001HIncome and race/ethnicity combined, “ICE_wbinc”Income and race/ethnicity combined, ‘ICE_wbinc’ white non-Hispanic in 80th income percentile vs. black alone (including Hispanic) in 20th income percentile
B19001 & B19001HIncome and race/ethnicity combined, “ICE_wpcinc”Income and race/ethnicity combined, ‘ICE_wpcinc’ white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile
-

ICE metrics can range in value from −1 (most deprived) to 1 (most -privileged). A value of 0 can thus represent two possibilities: (1) none -of the residents are in the most privileged or most deprived categories, -or (2) an equal number of persons are in the most privileged and most -deprived categories, and in both cases indicates that the area is not -dominated by extreme concentrations of either of the two groups.

-
ice2020WC <- krieger(state = "MI", county = "Wayne", year = 2010)
-
-# Obtain the 2010 census tracts from the "tigris" package
-tract2010WC <- tigris::tracts(state = "MI", county = "Wayne", year = 2010, cb = TRUE)
-# Remove first 9 characters from GEOID for compatibility with tigris information
-tract2010WC$GEOID <- substring(tract2010WC$GEO_ID, 10) 
-
-# Join the ICEs (Krieger) values to the census tract geometry
-ice2020WC <- dplyr::left_join(tract2010WC, ice2020WC$ice, by = "GEOID")
-
# Plot ICE for Income
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = ice2020WC, 
-                   ggplot2::aes(fill = ICE_inc),
-                   color = "white",
-                   size = 0.05) +
-  ggplot2::theme_bw() + 
-  ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1,1)) +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates")+
-  ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome (Krieger)",
-                   subtitle = "80th income percentile vs. 20th income percentile")
-
-# Plot ICE for Education
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = ice2020WC, 
-                   ggplot2::aes(fill = ICE_edu),
-                   color = "white",
-                   size = 0.05) +
-  ggplot2::theme_bw() + 
-  ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1,1)) +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates")+
-  ggplot2::ggtitle("Index of Concentration at the Extremes\nEducation (Krieger)",
-                   subtitle = "less than high school vs. four-year college degree or more")
-
-# Plot ICE for Race/Ethnicity
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = ice2020WC, 
-                   ggplot2::aes(fill = ICE_rewb),
-                   color = "white",
-                   size = 0.05) +
-  ggplot2::theme_bw() + 
-  ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates")+
-  ggplot2::ggtitle("Index of Concentration at the Extremes\nRace/Ethnicity (Krieger)",
-                   subtitle = "white non-Hispanic vs. black non-Hispanic")
-
-# Plot ICE for Income and Race/Ethnicity Combined
-## white non-Hispanic in 80th income percentile vs. black (including Hispanic) in 20th income percentile
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = ice2020WC, 
-                   ggplot2::aes(fill = ICE_wbinc),
-                   color = "white",
-                   size = 0.05) +
-  ggplot2::theme_bw() + 
-  ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates")+
-  ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome & race/ethnicity combined (Krieger)",
-                   subtitle = "white non-Hispanic in 80th inc ptcl vs. black alone in 20th inc pctl")
-
-# Plot ICE for Income and Race/Ethnicity Combined
-## white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = ice2020WC, 
-                   ggplot2::aes(fill = ICE_wpcinc),
-                   color = "white",
-                   size = 0.05) +
-  ggplot2::theme_bw() + 
-  ggplot2::scale_fill_gradient2(low = "#998ec3", mid = "#f7f7f7", high = "#f1a340", limits = c(-1, 1)) +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates")+
-  ggplot2::ggtitle("Index of Concentration at the Extremes\nIncome & race/ethnicity combined (Krieger)",
-                   subtitle = "white non-Hispanic (WNH) in 80th inc pctl vs. WNH in 20th inc pctl")
+

ICE metrics can range in value from −1 (most deprived) to 1 +(most privileged). A value of 0 can thus represent two possibilities: +(1) none of the residents are in the most privileged or most deprived +categories, or (2) an equal number of persons are in the most privileged +and most deprived categories, and in both cases indicates that the area +is not dominated by extreme concentrations of either of the two +groups.

+
ice2020WC <- krieger(state = 'MI', county = 'Wayne', year = 2010)
+
+# Obtain the 2010 census tracts from the 'tigris' package
+tract2010WC <- tracts(state = 'MI', county = 'Wayne', year = 2010, cb = TRUE)
+# Remove first 9 characters from GEOID for compatibility with tigris information
+tract2010WC$GEOID <- substring(tract2010WC$GEO_ID, 10) 
+
+# Join the ICE values to the census tract geometry
+ice2020WC <- tract2010WC %>%
+  left_join(ice2020WC$ice, by = 'GEOID')
+
# Plot ICE for Income
+ggplot() +
+  geom_sf(
+    data = ice2020WC,
+    aes(fill = ICE_inc),
+    color = 'white',
+    size = 0.05
+  ) +
+  theme_bw() +
+  scale_fill_gradient2(
+    low = '#998ec3',
+    mid = '#f7f7f7',
+    high = '#f1a340',
+    limits = c(-1, 1)
+  ) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Index of Concentration at the Extremes\nIncome (Krieger)',
+    subtitle = '80th income percentile vs. 20th income percentile'
+  )
+
+# Plot ICE for Education
+ggplot() +
+  geom_sf(
+    data = ice2020WC,
+    aes(fill = ICE_edu),
+    color = 'white',
+    size = 0.05
+  ) +
+  theme_bw() +
+  scale_fill_gradient2(
+    low = '#998ec3',
+    mid = '#f7f7f7',
+    high = '#f1a340',
+    limits = c(-1, 1)
+  ) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Index of Concentration at the Extremes\nEducation (Krieger)',
+    subtitle = 'less than high school vs. four-year college degree or more'
+  )
+
+# Plot ICE for Race/Ethnicity
+ggplot() +
+  geom_sf(
+    data = ice2020WC,
+    aes(fill = ICE_rewb),
+    color = 'white',
+    size = 0.05
+  ) +
+  theme_bw() +
+  scale_fill_gradient2(
+    low = '#998ec3',
+    mid = '#f7f7f7',
+    high = '#f1a340',
+    limits = c(-1, 1)
+  ) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Index of Concentration at the Extremes\nRace/Ethnicity (Krieger)',
+    subtitle = 'white non-Hispanic vs. black non-Hispanic'
+  )
+
+# Plot ICE for Income and Race/Ethnicity Combined
+## white non-Hispanic in 80th income percentile vs. 
+## black (including Hispanic) in 20th income percentile
+ggplot() +
+  geom_sf(
+    data = ice2020WC,
+    aes(fill = ICE_wbinc),
+    color = 'white',
+    size = 0.05
+  ) +
+  theme_bw() +
+  scale_fill_gradient2(
+    low = '#998ec3',
+    mid = '#f7f7f7',
+    high = '#f1a340',
+    limits = c(-1, 1)
+  ) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Index of Concentration at the Extremes\nIncome & race/ethnicity combined (Krieger)',
+    subtitle = 'white non-Hispanic in 80th inc ptcl vs. black alone in 20th inc pctl'
+  )
+
+# Plot ICE for Income and Race/Ethnicity Combined
+## white non-Hispanic in 80th income percentile vs. white non-Hispanic in 20th income percentile
+ggplot() +
+  geom_sf(
+    data = ice2020WC,
+    aes(fill = ICE_wpcinc),
+    color = 'white',
+    size = 0.05
+  ) +
+  theme_bw() +
+  scale_fill_gradient2(
+    low = '#998ec3',
+    mid = '#f7f7f7',
+    high = '#f1a340',
+    limits = c(-1, 1)
+  ) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Index of Concentration at the Extremes\nIncome & race/ethnicity combined (Krieger)',
+    subtitle = 'white non-Hispanic (WNH) in 80th inc pctl vs. WNH in 20th inc pctl'
+  )

-
-

Compute racial/ethnic Dissimilarity Index (DI)

-

Compute the DI (Duncan) values (2006-2010 5-year ACS) for -Pennsylvania, U.S.A., counties from census tracts. This metric is based -on Duncan & Duncan -(1955) that assessed the racial/ethnic isolation of students that -identify as non-Hispanic or Latino, Black or African American alone -compared to students that identify as non-Hispanic or Latino, white -alone between schools and school districts. Multiple racial/ethnic -subgroups are available in the duncan() function, -including:

+
+

Compute racial/ethnic Dissimilarity Index (D)

+

Compute the aspatial racial/ethnic D values (2006-2010 +5-year ACS) for Pennsylvania, U.S.A., counties from census tracts. This +metric is based on Duncan +& Duncan (1955) that assessed the racial/ethnic isolation of +students that identify as non-Hispanic or Latino, Black or African +American alone compared to students that identify as non-Hispanic or +Latino, white alone between schools and school districts. Multiple +racial/ethnic subgroups are available in the duncan() +function, including:

@@ -1686,49 +1870,65 @@

Compute racial/ethnic Dissimilarity Index (DI)

-

DI is a measure of the evenness of racial/ethnic residential +

D is a measure of the evenness of racial/ethnic residential segregation when comparing smaller geographical areas to larger ones -within which the smaller geographical areas are located. The DI metric -can range in value from 0 to 1 and represents the proportion of +within which the smaller geographical areas are located. D can +range in value from 0 to 1 and represents the proportion of racial/ethnic subgroup members that would have to change their area of residence to achieve an even distribution within the larger geographical area under conditions of maximum segregation.

-
duncan2010PA <- ndi::duncan(geo_large = "county", geo_small = "tract", state = "PA",
-                            year = 2010, subgroup = "NHoLB", subgroup_ref = "NHoLW")
-
-# Obtain the 2010 census counties from the "tigris" package
-county2010PA <- tigris::counties(state = "PA", year = 2010, cb = TRUE)
-# Remove first 9 characters from GEOID for compatibility with tigris information
-county2010PA$GEOID <- substring(county2010PA$GEO_ID, 10) 
-
-# Join the DI (Duncan) values to the county geometry
-PA2010duncan <- dplyr::left_join(county2010PA, duncan2010PA$di, by = "GEOID")
-
# Visualize the DI (Duncan) values (2006-2010 5-year ACS) for Pennsylvania, U.S.A., counties 
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = PA2010duncan, 
-                   ggplot2::aes(fill = DI),
-                   size = 0.05,
-                   color = "white") +
-   ggplot2::geom_sf(data = county2010PA,
-                    fill = "transparent", 
-                    color = "white",
-                    size = 0.2) +
-  ggplot2::theme_minimal() +
-  ggplot2::scale_fill_viridis_c(limits = c(0, 1)) +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2006-2010 estimates") +
-  ggplot2::ggtitle("Dissimilarity Index (Duncan)\nPennsylvania census tracts to counties",
-                   subtitle = "Black non-Hispanic vs. white non-Hispanic")
-

+
duncan2010PA <- duncan(
+  geo_large = 'county',
+  geo_small = 'tract',
+  state = 'PA',
+  year = 2010,
+  subgroup = 'NHoLB',
+  subgroup_ref = 'NHoLW'
+)
+
+# Obtain the 2010 census counties from the 'tigris' package
+county2010PA <- counties(state = 'PA', year = 2010, cb = TRUE)
+# Remove first 9 characters from GEOID for compatibility with tigris information
+county2010PA$GEOID <- substring(county2010PA$GEO_ID, 10) 
+
+# Join the D values to the county geometry
+PA2010duncan <- county2010PA %>%
+  left_join(duncan2010PA$d, by = 'GEOID')
+
# Visualize the D values (2006-2010 5-year ACS) for Pennsylvania, U.S.A., counties 
+ggplot() +
+  geom_sf(
+    data = PA2010duncan,
+    aes(fill = D),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = county2010PA,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c(limits = c(0, 1)) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2006-2010 estimates') +
+  ggtitle(
+    'Dissimilarity Index (Duncan & Duncan)\nPennsylvania census tracts to counties',
+    subtitle = 'Black non-Hispanic vs. white non-Hispanic'
+  )
+

-
-

Compute Atkinson Index (AI)

-

Compute the AI (Atkinson) values (2017-2021 5-year ACS) for Kentucky, -U.S.A., counties from census block groups. This metric is based on Atkinson (1970) that assessed +

+

Compute aspatial income or racial/ethnic Atkinson Index +(A)

+

Compute the aspatial income or racial/ethnic A values +(2017-2021 5-year ACS) for Kentucky, U.S.A., counties from census block +groups. This metric is based on Atkinson (1970) that assessed the distribution of income within 12 counties but has since been adapted -to study racial/ethnic segregation (see James & Taeuber 1985). -Multiple racial/ethnic subgroups are available in the -atkinson() function, including:

+to study racial/ethnic segregation (see James & Taeuber 1985). To +compare median household income, specify +subgroup = 'MedHHInc' which will use the ACS-5 variable +‘B19013_001’ in the computation. Multiple racial/ethnic subgroups are +available in the atkinson() function, including:

@@ -1849,100 +2049,1384 @@

Compute Atkinson Index (AI)

-

To compare median household income, specify -subgroup = "MedHHInc" which will use the ACS-5 variable -“B19013_001” in the computation.

-

AI is a measure of the inequality and, in the context of residential -race/ethnicity, segregation when comparing smaller geographical areas to -larger ones within which the smaller geographical areas are located. The -AI metric can range in value from 0 to 1 and smaller values of the index -indicate lower levels of inequality (e.g., less segregation).

-

AI is sensitive to the choice of epsilon argument or the -shape parameter that determines how to weight the increments to -inequality (segregation) contributed by different proportions of the -Lorenz curve. A user must explicitly decide how heavily to weight -smaller geographical units at different points on the Lorenz curve -(i.e., whether the index should take greater account of differences -among areas of over- or under-representation). The epsilon -argument must have values between 0 and 1.0. For -0 <= epsilon < 0.5 or less “inequality-averse,” +

A is a measure of the inequality and, in the context of +residential race/ethnicity, segregation when comparing smaller +geographical areas to larger ones within which the smaller geographical +areas are located. A can range in value from 0 to 1 and smaller +values of the index indicate lower levels of inequality (e.g., less +segregation).

+

A is sensitive to the choice of epsilon +argument or the shape parameter that determines how to weight the +increments to inequality (segregation) contributed by different +proportions of the Lorenz curve. A user must explicitly decide how +heavily to weight smaller geographical units at different points on the +Lorenz curve (i.e., whether the index should take greater account of +differences among areas of over- or under-representation). The +epsilon argument must have values between 0 and 1.0. For +0 <= epsilon < 0.5 or less ‘inequality-averse,’ smaller geographical units with a subgroup proportion smaller than the subgroup proportion of the larger geographical unit contribute more to -inequality (“over-representation”). For -0.5 < epsilon <= 1.0 or more “inequality-averse,” +inequality (‘over-representation’). For +0.5 < epsilon <= 1.0 or more ‘inequality-averse,’ smaller geographical units with a subgroup proportion larger than the subgroup proportion of the larger geographical unit contribute more to -inequality (“under-representation”). If epsilon = 0.5 (the +inequality (‘under-representation’). If epsilon = 0.5 (the default), units of over- and under-representation contribute equally to -the index. See Section 2.3 of Saint-Jacques et +the index. See Section 2.3 of Saint-Jacques et al. (2020) for one method to select epsilon. We choose epsilon = 0.67 in the example below:

-
atkinson2021KY <- ndi::atkinson(geo_large = "county", geo_small = "block group", state = "KY",
-                                year = 2021, subgroup = "NHoLB", epsilon = 0.67)
-
-# Obtain the 2021 census counties from the "tigris" package
-county2021KY <- tigris::counties(state = "KY", year = 2021, cb = TRUE)
-
-# Join the AI (Atkinson) values to the county geometry
-KY2021atkinson <- dplyr::left_join(county2021KY, atkinson2021KY$ai, by = "GEOID")
-
# Visualize the AI (Atkinson) values (2017-2021 5-year ACS) for Kentucky, U.S.A., counties
-ggplot2::ggplot() + 
-  ggplot2::geom_sf(data = KY2021atkinson, 
-                   ggplot2::aes(fill = AI),
-                   size = 0.05,
-                   color = "white") +
-   ggplot2::geom_sf(data = county2021KY,
-                    fill = "transparent", 
-                    color = "white",
-                    size = 0.2) +
-  ggplot2::theme_minimal() +
-  ggplot2::scale_fill_viridis_c(limits = c(0, 1)) +
-  ggplot2::labs(fill = "Index (Continuous)",
-                caption = "Source: U.S. Census ACS 2017-2021 estimates") +
-  ggplot2::ggtitle("Atkinson Index (Atkinson)\nKentucky census block groups to counties",
-                   subtitle = expression(paste("Black non-Hispanic (", epsilon, " = 0.67)")))
+
atkinson2021KY <- atkinson(
+  geo_large = 'county',
+  geo_small = 'block group',
+  state = 'KY',
+  year = 2021,
+  subgroup = 'NHoLB',
+  epsilon = 0.67
+)
+
+# Obtain the 2021 census counties from the 'tigris' package
+county2021KY <- counties(state = 'KY', year = 2021, cb = TRUE)
+
+# Join the A values to the county geometry
+KY2021atkinson <- county2021KY %>% 
+  left_join(atkinson2021KY$a, by = 'GEOID')
+
# Visualize the A values (2017-2021 5-year ACS) for Kentucky, U.S.A., counties
+ggplot() +
+  geom_sf(
+    data = KY2021atkinson,
+    aes(fill = A),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = county2021KY,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c(limits = c(0, 1)) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
+  ggtitle(
+    'Atkinson Index (Atkinson)\nKentucky census block groups to counties',
+    subtitle = expression(paste('Black non-Hispanic (', epsilon, ' = 0.67)'))
+  )

-
sessionInfo()
-
## R version 4.2.1 (2022-06-23 ucrt)
-## Platform: x86_64-w64-mingw32/x64 (64-bit)
-## Running under: Windows 10 x64 (build 19045)
-## 
-## Matrix products: default
-## 
-## locale:
-## [1] LC_COLLATE=English_United States.utf8 
-## [2] LC_CTYPE=English_United States.utf8   
-## [3] LC_MONETARY=English_United States.utf8
-## [4] LC_NUMERIC=C                          
-## [5] LC_TIME=English_United States.utf8    
+
+
+

Compute racial/ethnic Interaction Index (xPy*)

+

Compute the aspatial racial/ethnic xPy* values (2017-2021 +5-year ACS) for Ohio, U.S.A., counties from census tracts. This metric +is based on Shevky & Williams (1949; ISBN-13:978-0-837-15637-8) and +adapted by Bell (1954). +Multiple racial/ethnic subgroups are available in the +bell() function, including:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ACS table sourceracial/ethnic subgroupcharacter for subgroup or subgroup_ixn +argument
B03002_002not Hispanic or LatinoNHoL
B03002_003not Hispanic or Latino, white aloneNHoLW
B03002_004not Hispanic or Latino, Black or African American aloneNHoLB
B03002_005not Hispanic or Latino, American Indian and Alaska Native aloneNHoLAIAN
B03002_006not Hispanic or Latino, Asian aloneNHoLA
B03002_007not Hispanic or Latino, Native Hawaiian and Other Pacific Islander +aloneNHoLNHOPI
B03002_008not Hispanic or Latino, some other race aloneNHoLSOR
B03002_009not Hispanic or Latino, two or more racesNHoLTOMR
B03002_010not Hispanic or Latino, two races including some other raceNHoLTRiSOR
B03002_011not Hispanic or Latino, two races excluding some other race, and +three or more racesNHoLTReSOR
B03002_012Hispanic or LatinoHoL
B03002_013Hispanic or Latino, white aloneHoLW
B03002_014Hispanic or Latino, Black or African American aloneHoLB
B03002_015Hispanic or Latino, American Indian and Alaska Native aloneHoLAIAN
B03002_016Hispanic or Latino, Asian aloneHoLA
B03002_017Hispanic or Latino, Native Hawaiian and other Pacific Islander +aloneHoLNHOPI
B03002_018Hispanic or Latino, some other race aloneHoLSOR
B03002_019Hispanic or Latino, two or more racesHoLTOMR
B03002_020Hispanic or Latino, two races including some other raceHoLTRiSOR
B03002_021Hispanic or Latino, two races excluding some other race, and three +or more racesHoLTReSOR
+

xPy* is some measure of the probability that a member of one +subgroup(s) will meet or interact with a member of another subgroup(s) +with higher values signifying higher probability of interaction (less +isolation) when comparing smaller geographical areas to larger ones +within which the smaller geographical areas are located. xPy* +can range in value from 0 to 1.

+
bell2021OH <- bell(
+  geo_large = 'county',
+  geo_small = 'tract',
+  state = 'OH',
+  year = 2021,
+  subgroup = 'NHoLB',
+  subgroup_ixn = 'NHoLW'
+)
+
+# Obtain the 2021 census counties from the 'tigris' package
+county2021OH <- counties(state = 'OH', year = 2021, cb = TRUE)
+
+# Join the xPy* values to the county geometry
+OH2021bell <- county2021OH %>%
+  left_join(bell2021OH$xpy_star, by = 'GEOID')
+
# Visualize the xPy* values (2017-2021 5-year ACS) for Ohio, U.S.A., counties
+ggplot() +
+  geom_sf(
+    data = OH2021bell,
+    aes(fill = xPy_star),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = county2021OH,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c(limits = c(0, 1)) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
+  ggtitle(
+    'Interaction Index (Bell)\nOhio census tracts to counties',
+    subtitle = 'Black non-Hispanic vs. white non-Hispanic'
+  )
+

+
+
+

Compute Correlation Ratio (V)

+

Compute the aspatial racial/ethnic V values (2017-2021 +5-year ACS) for South Carolina, U.S.A., counties from census tracts. +This metric is based on Bell +(1954) and adapted by White (1986). Multiple +racial/ethnic subgroups are available in the white() +function, including:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ACS table sourceracial/ethnic subgroupcharacter for subgroup argument
B03002_002not Hispanic or LatinoNHoL
B03002_003not Hispanic or Latino, white aloneNHoLW
B03002_004not Hispanic or Latino, Black or African American aloneNHoLB
B03002_005not Hispanic or Latino, American Indian and Alaska Native aloneNHoLAIAN
B03002_006not Hispanic or Latino, Asian aloneNHoLA
B03002_007not Hispanic or Latino, Native Hawaiian and Other Pacific Islander +aloneNHoLNHOPI
B03002_008not Hispanic or Latino, some other race aloneNHoLSOR
B03002_009not Hispanic or Latino, two or more racesNHoLTOMR
B03002_010not Hispanic or Latino, two races including some other raceNHoLTRiSOR
B03002_011not Hispanic or Latino, two races excluding some other race, and +three or more racesNHoLTReSOR
B03002_012Hispanic or LatinoHoL
B03002_013Hispanic or Latino, white aloneHoLW
B03002_014Hispanic or Latino, Black or African American aloneHoLB
B03002_015Hispanic or Latino, American Indian and Alaska Native aloneHoLAIAN
B03002_016Hispanic or Latino, Asian aloneHoLA
B03002_017Hispanic or Latino, Native Hawaiian and other Pacific Islander +aloneHoLNHOPI
B03002_018Hispanic or Latino, some other race aloneHoLSOR
B03002_019Hispanic or Latino, two or more racesHoLTOMR
B03002_020Hispanic or Latino, two races including some other raceHoLTRiSOR
B03002_021Hispanic or Latino, two races excluding some other race, and three +or more racesHoLTReSOR
+

V removes the asymmetry from the Isolation Index by +controlling for the effect of population composition when comparing +smaller geographical areas to larger ones within which the smaller +geographical areas are located. The Isolation Index is some measure of +the probability that a member of one subgroup(s) will meet or interact +with a member of another subgroup(s) with higher values signifying +higher probability of interaction (less isolation). V can range +in value from 0 to Inf.

+
white2021SC <- white(
+  geo_large = 'county',
+  geo_small = 'tract',
+  state = 'SC',
+  year = 2021,
+  subgroup = 'NHoLB'
+)
+
+# Obtain the 2021 census counties from the 'tigris' package
+county2021SC <- counties(state = 'SC', year = 2021, cb = TRUE)
+
+# Join the V values to the county geometry
+SC2021white <- county2021SC %>%
+  left_join(white2021SC$v, by = 'GEOID')
+
# Visualize the V values (2017-2021 5-year ACS) for South Carolina, U.S.A., counties
+ggplot() +
+  geom_sf(
+    data = SC2021white,
+    aes(fill = V),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = county2021SC,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_gradient2(
+    low = '#998ec3', 
+    mid = '#f7f7f7', 
+    high = '#f1a340', 
+    midpoint = 1
+  ) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
+  ggtitle(
+    'Correlation Ratio (White)\nSouth Carolina census tracts to counties',
+    subtitle = 'Black non-Hispanic'
+  )
+

+
+
+

Compute Location Quotient (LQ)

+

Compute the aspatial racial/ethnic LQ values (2017-2021 +5-year ACS) for Tennessee, U.S.A., counties vs. the state. This metric +is based on Merton (1939) +and adapted by Sudano et +al. (2013). Multiple racial/ethnic subgroups are available in the +sudano() function, including:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ACS table sourceracial/ethnic subgroupcharacter for subgroup argument
B03002_002not Hispanic or LatinoNHoL
B03002_003not Hispanic or Latino, white aloneNHoLW
B03002_004not Hispanic or Latino, Black or African American aloneNHoLB
B03002_005not Hispanic or Latino, American Indian and Alaska Native aloneNHoLAIAN
B03002_006not Hispanic or Latino, Asian aloneNHoLA
B03002_007not Hispanic or Latino, Native Hawaiian and Other Pacific Islander +aloneNHoLNHOPI
B03002_008not Hispanic or Latino, some other race aloneNHoLSOR
B03002_009not Hispanic or Latino, two or more racesNHoLTOMR
B03002_010not Hispanic or Latino, two races including some other raceNHoLTRiSOR
B03002_011not Hispanic or Latino, two races excluding some other race, and +three or more racesNHoLTReSOR
B03002_012Hispanic or LatinoHoL
B03002_013Hispanic or Latino, white aloneHoLW
B03002_014Hispanic or Latino, Black or African American aloneHoLB
B03002_015Hispanic or Latino, American Indian and Alaska Native aloneHoLAIAN
B03002_016Hispanic or Latino, Asian aloneHoLA
B03002_017Hispanic or Latino, Native Hawaiian and other Pacific Islander +aloneHoLNHOPI
B03002_018Hispanic or Latino, some other race aloneHoLSOR
B03002_019Hispanic or Latino, two or more racesHoLTOMR
B03002_020Hispanic or Latino, two races including some other raceHoLTRiSOR
B03002_021Hispanic or Latino, two races excluding some other race, and three +or more racesHoLTReSOR
+

LQ is some measure of relative racial homogeneity of each +smaller geography within a larger geography. LQ can range in +value from 0 to infinity because it is ratio of two proportions in which +the numerator is the proportion of subgroup population in a smaller +geography and the denominator is the proportion of subgroup population +in its larger geography. For example, a smaller geography with an +LQ of 5 means that the proportion of the subgroup population +living in the smaller geography is five times the proportion of the +subgroup population in its larger geography. Unlike the previous metrics +that aggregate to the larger geography, LQ computes values for +each smaller geography relative to the larger geography.

+
sudano2021TN <- sudano(
+  geo_large = 'state',
+  geo_small = 'county',
+  state = 'TN',
+  year = 2021,
+  subgroup = 'NHoLB'
+)
+
+# Obtain the 2021 census counties from the 'tigris' package
+county2021TN <- counties(state = 'TN', year = 2021, cb = TRUE)
+
+# Join the LQ values to the county geometry
+TN2021sudano <- county2021TN %>% 
+                   left_join(sudano2021TN$lq, by = 'GEOID')
+
# Visualize the LQ values (2017-2021 5-year ACS) for Tennessee, U.S.A., counties
+ggplot() +
+  geom_sf(
+    data = TN2021sudano,
+    aes(fill = LQ),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = county2021TN,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
+  ggtitle(
+    'Location Quotient (Sudano)\nTennessee counties vs. state',
+    subtitle = 'Black non-Hispanic'
+  )
+

+
+
+

Compute Local Exposure and Isolation (LEx/Is)

+

Compute the aspatial racial/ethnic Local Exposure and Isolation +metric (2017-2021 5-year ACS) for Mississippi, U.S.A., counties vs. the +state. This metric is based on Bemanian & +Beyer (2017). Multiple racial/ethnic subgroups are available in the +bemanian_beyer() function, including:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ACS table sourceracial/ethnic subgroupcharacter for subgroup or subgroup_ixn +argument
B03002_002not Hispanic or LatinoNHoL
B03002_003not Hispanic or Latino, white aloneNHoLW
B03002_004not Hispanic or Latino, Black or African American aloneNHoLB
B03002_005not Hispanic or Latino, American Indian and Alaska Native aloneNHoLAIAN
B03002_006not Hispanic or Latino, Asian aloneNHoLA
B03002_007not Hispanic or Latino, Native Hawaiian and Other Pacific Islander +aloneNHoLNHOPI
B03002_008not Hispanic or Latino, some other race aloneNHoLSOR
B03002_009not Hispanic or Latino, two or more racesNHoLTOMR
B03002_010not Hispanic or Latino, two races including some other raceNHoLTRiSOR
B03002_011not Hispanic or Latino, two races excluding some other race, and +three or more racesNHoLTReSOR
B03002_012Hispanic or LatinoHoL
B03002_013Hispanic or Latino, white aloneHoLW
B03002_014Hispanic or Latino, Black or African American aloneHoLB
B03002_015Hispanic or Latino, American Indian and Alaska Native aloneHoLAIAN
B03002_016Hispanic or Latino, Asian aloneHoLA
B03002_017Hispanic or Latino, Native Hawaiian and other Pacific Islander +aloneHoLNHOPI
B03002_018Hispanic or Latino, some other race aloneHoLSOR
B03002_019Hispanic or Latino, two or more racesHoLTOMR
B03002_020Hispanic or Latino, two races including some other raceHoLTRiSOR
B03002_021Hispanic or Latino, two races excluding some other race, and three +or more racesHoLTReSOR
+

LEx/Is is a measure of the probability that two individuals +living within a specific smaller geography (e.g., census tract) of +either different (i.e., exposure) or the same (i.e., isolation) +racial/ethnic subgroup(s) will interact, assuming that individuals +within a smaller geography are randomly mixed. LEx/Is is +standardized with a logit transformation and centered against an +expected case that all races/ethnicities are evenly distributed across a +larger geography. LEx/Is can range from negative infinity to +infinity. If LEx/Is is zero then the estimated probability of +the interaction between two people of the given subgroup(s) within a +smaller geography is equal to the expected probability if the +subgroup(s) were perfectly mixed in the larger geography. If +LEx/Is is greater than zero then the interaction is more likely +to occur within the smaller geography than in the larger geography, and +if LEx/Is is less than zero then the interaction is less likely +to occur within the smaller geography than in the larger geography. +Note: the exponentiation of each LEx/Is metric results in the +odds ratio of the specific exposure or isolation of interest in a +smaller geography relative to the larger geography. Similar to +LQ (Sudano), LEx/Is computes values for each smaller +geography relative to the larger geography.

+
bemanian_beyer2021MS <- bemanian_beyer(
+  geo_large = 'state',
+  geo_small = 'county',
+  state = 'MS',
+  year = 2021,
+  subgroup = 'NHoLB',
+  subgroup_ixn = 'NHoLW'
+)
+
+# Obtain the 2021 census counties from the 'tigris' package
+county2021MS <- counties(state = 'MS', year = 2021, cb = TRUE)
+
+# Join the LEx/Is values to the county geometry
+MS2021bemanian_beyer <- county2021MS %>%
+  left_join(bemanian_beyer2021MS$lexis, by = 'GEOID')
+
# Visualize the LEx/Is values (2017-2021 5-year ACS) for Mississippi, U.S.A., counties
+ggplot() +
+  geom_sf(
+    data = MS2021bemanian_beyer,
+    aes(fill = LExIs),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = county2021MS,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_gradient2(
+    low = '#998ec3',
+    mid = '#f7f7f7',
+    high = '#f1a340'
+  ) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
+  ggtitle(
+    'Local Exposure and Isolation (Bemanian & Beyer)\nMississippi counties vs. state',
+    subtitle = 'Black non-Hispanic vs. White non-Hispanic'
+  )
+

+
# Visualize the exponentiated LEx/Is values (2017-2021 5-year ACS) for 
+## Mississippi, U.S.A., counties
+ggplot() +
+  geom_sf(
+    data = MS2021bemanian_beyer,
+    aes(fill = exp(LExIs)),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = county2021MS,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c() +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
+  ggtitle(
+    'Odds ratio of Local Exposure and Isolation (Bemanian & Beyer)\n
+    Mississippi counties vs. state',
+    subtitle = 'Black non-Hispanic vs. White non-Hispanic'
+  )
+

+
+
+

Compute Delta (DEL)

+

Compute the aspatial racial/ethnic DEL values (2017-2021 +5-year ACS) for Alabama, U.S.A., counties from census tracts. This +metric is based on Hoover (1941) and +Duncan et al. (1961; LC:60007089). Multiple racial/ethnic subgroups are +available in the hoover() function, including:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ACS table sourceracial/ethnic subgroupcharacter for subgroup argument
B03002_002not Hispanic or LatinoNHoL
B03002_003not Hispanic or Latino, white aloneNHoLW
B03002_004not Hispanic or Latino, Black or African American aloneNHoLB
B03002_005not Hispanic or Latino, American Indian and Alaska Native aloneNHoLAIAN
B03002_006not Hispanic or Latino, Asian aloneNHoLA
B03002_007not Hispanic or Latino, Native Hawaiian and Other Pacific Islander +aloneNHoLNHOPI
B03002_008not Hispanic or Latino, some other race aloneNHoLSOR
B03002_009not Hispanic or Latino, two or more racesNHoLTOMR
B03002_010not Hispanic or Latino, two races including some other raceNHoLTRiSOR
B03002_011not Hispanic or Latino, two races excluding some other race, and +three or more racesNHoLTReSOR
B03002_012Hispanic or LatinoHoL
B03002_013Hispanic or Latino, white aloneHoLW
B03002_014Hispanic or Latino, Black or African American aloneHoLB
B03002_015Hispanic or Latino, American Indian and Alaska Native aloneHoLAIAN
B03002_016Hispanic or Latino, Asian aloneHoLA
B03002_017Hispanic or Latino, Native Hawaiian and other Pacific Islander +aloneHoLNHOPI
B03002_018Hispanic or Latino, some other race aloneHoLSOR
B03002_019Hispanic or Latino, two or more racesHoLTOMR
B03002_020Hispanic or Latino, two races including some other raceHoLTRiSOR
B03002_021Hispanic or Latino, two races excluding some other race, and three +or more racesHoLTReSOR
+

DEL is a measure of the proportion of members of one +subgroup(s) residing in geographic units with above average density of +members of the subgroup(s). The index provides the proportion of a +subgroup population that would have to move across geographic units to +achieve a uniform density. DEL can range in value from 0 to +1.

+
hoover2021AL <- hoover(
+  geo_large = 'county',
+  geo_small = 'tract',
+  state = 'AL',
+  year = 2021,
+  subgroup = 'NHoLB'
+)
+
+# Obtain the 2021 census counties from the 'tigris' package
+county2021AL <- counties(state = 'AL', year = 2021, cb = TRUE)
+
+# Join the DEL values to the county geometry
+AL2021hoover <- county2021AL %>%
+  left_join(hoover2021AL$del, by = 'GEOID')
+
# Visualize the DEL values (2017-2021 5-year ACS) for Alabama, U.S.A., counties
+ggplot() +
+  geom_sf(
+    data = AL2021hoover,
+    aes(fill = DEL),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = county2021AL,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c(limits = c(0, 1)) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
+  ggtitle(
+    'Delta (Hoover)\nAlabama census tracts to counties',
+    subtitle = 'Black non-Hispanic'
+  )
+

+
+
+

Compute an index of spatial proximity (SP)

+

Compute an index of spatial proximity (2010-2014 5-year ACS) for +Atlanta, GA, metropolitan area from census tracts. This metric is based +on White (1986) and Blau +(1977; ISBN-13:978-0-029-03660-0) that designed the metric to identify +racial or ethnic enclaves. Multiple racial/ethnic subgroups are +available in the white_blau() function, including:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ACS table sourceracial/ethnic subgroupcharacter for subgroup or subgroup_ref +arguments
B03002_002not Hispanic or LatinoNHoL
B03002_003not Hispanic or Latino, white aloneNHoLW
B03002_004not Hispanic or Latino, Black or African American aloneNHoLB
B03002_005not Hispanic or Latino, American Indian and Alaska Native aloneNHoLAIAN
B03002_006not Hispanic or Latino, Asian aloneNHoLA
B03002_007not Hispanic or Latino, Native Hawaiian and Other Pacific Islander +aloneNHoLNHOPI
B03002_008not Hispanic or Latino, some other race aloneNHoLSOR
B03002_009not Hispanic or Latino, two or more racesNHoLTOMR
B03002_010not Hispanic or Latino, two races including some other raceNHoLTRiSOR
B03002_011not Hispanic or Latino, two races excluding some other race, and +three or more racesNHoLTReSOR
B03002_012Hispanic or LatinoHoL
B03002_013Hispanic or Latino, white aloneHoLW
B03002_014Hispanic or Latino, Black or African American aloneHoLB
B03002_015Hispanic or Latino, American Indian and Alaska Native aloneHoLAIAN
B03002_016Hispanic or Latino, Asian aloneHoLA
B03002_017Hispanic or Latino, Native Hawaiian and other Pacific Islander +aloneHoLNHOPI
B03002_018Hispanic or Latino, some other race aloneHoLSOR
B03002_019Hispanic or Latino, two or more racesHoLTOMR
B03002_020Hispanic or Latino, two races including some other raceHoLTRiSOR
B03002_021Hispanic or Latino, two races excluding some other race, and three +or more racesHoLTReSOR
+

SP is a measure of clustering of racial/ethnic populations +within smaller geographical areas that are located within larger +geographical areas. SP can range in value from 0 to Inf and +represents the degree to which an area is a racial or ethnic enclave. A +value of 1 indicates there is no differential clustering between +subgroup and referent group members. A value greater than 1 indicates +subgroup members live nearer to one another than to referent subgroup +members. A value less than 1 indicates subgroup live nearer to and +referent subgroup members than to their own subgroup members.

+
whiteblau2014GA <- white_blau(
+  geo_large = 'csa',
+  geo_small = 'tract',
+  state = c('GA', 'AL', 'TN', 'FL'),
+  year = 2014,
+  subgroup = 'NHoLB',
+  subgroup_ref = 'NHoLW'
+)
+
+# Obtain the 2014 Combined Statistical Areas from the 'tigris' package
+csa2014 <- combined_statistical_areas(year = 2014, cb = TRUE)
+# Obtain the 2014 state from the 'tigris' package
+state2014 <- states(cb = TRUE)
+
+# Join the SP values to the CSA geometries and filter for Georgia
+GA2010whiteblau <- csa2014 %>%
+  left_join(whiteblau2014GA$sp, by = 'GEOID') %>%
+  filter(!st_is_empty(.)) %>%
+  filter(!is.na(SP)) %>%
+  st_filter(state2014 %>% filter(STUSPS == 'GA')) %>%
+  st_make_valid()
+
# Visualize the SP values (2010-2014 5-year ACS) for Georgia, U.S.A., CSAs 
+ggplot() +
+  geom_sf(
+    data = GA2010whiteblau,
+    aes(fill = SP),
+   # size = 0.05,
+   # color = 'white'
+  ) +
+  geom_sf(
+    data = state2014 %>% filter(STUSPS == 'GA'),
+    fill = 'transparent',
+    color = 'black',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_gradient2(
+    low = '#998ec3', 
+    mid = '#f7f7f7', 
+    high = '#f1a340', 
+    midpoint = 1
+  ) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2010-2014 estimates') +
+  ggtitle(
+    'An index of spatial proximity (White)\nCensus tracts to Combined Statistical Areas in Georgia',
+    subtitle = 'Black non-Hispanic vs. white non-Hispanic'
+  )
+

+
+
+

Compute racial/ethnic Isolation Index (xPx*)

+

Compute the aspatial racial/ethnic xPx* values (2015-2019 +5-year ACS) for Delaware, U.S.A., census tracts from census block +groups. This metric is based on Bell (1954) and adapted by +Lieberson (1981; ISBN-13:978-1-032-53884-6). Multiple racial/ethnic +subgroups are available in the lieberson() function, +including:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ACS table sourceracial/ethnic subgroupcharacter for subgroup argument
B03002_002not Hispanic or LatinoNHoL
B03002_003not Hispanic or Latino, white aloneNHoLW
B03002_004not Hispanic or Latino, Black or African American aloneNHoLB
B03002_005not Hispanic or Latino, American Indian and Alaska Native aloneNHoLAIAN
B03002_006not Hispanic or Latino, Asian aloneNHoLA
B03002_007not Hispanic or Latino, Native Hawaiian and Other Pacific Islander +aloneNHoLNHOPI
B03002_008not Hispanic or Latino, some other race aloneNHoLSOR
B03002_009not Hispanic or Latino, two or more racesNHoLTOMR
B03002_010not Hispanic or Latino, two races including some other raceNHoLTRiSOR
B03002_011not Hispanic or Latino, two races excluding some other race, and +three or more racesNHoLTReSOR
B03002_012Hispanic or LatinoHoL
B03002_013Hispanic or Latino, white aloneHoLW
B03002_014Hispanic or Latino, Black or African American aloneHoLB
B03002_015Hispanic or Latino, American Indian and Alaska Native aloneHoLAIAN
B03002_016Hispanic or Latino, Asian aloneHoLA
B03002_017Hispanic or Latino, Native Hawaiian and other Pacific Islander +aloneHoLNHOPI
B03002_018Hispanic or Latino, some other race aloneHoLSOR
B03002_019Hispanic or Latino, two or more racesHoLTOMR
B03002_020Hispanic or Latino, two races including some other raceHoLTRiSOR
B03002_021Hispanic or Latino, two races excluding some other race, and three +or more racesHoLTReSOR
+

xPx* is some measure of the probability that a member of one +subgroup(s) will meet or interact with a member of another subgroup(s) +with higher values signifying higher probability of interaction (less +isolation) when comparing smaller geographical areas to larger ones +within which the smaller geographical areas are located. xPx* +can range in value from 0 to 1.

+
lieberson2021DE <- lieberson(
+  geo_large = 'tract',
+  geo_small = 'block group',
+  state = 'DE',
+  year = 2019,
+  subgroup = 'NHoLB'
+)
+
+# Obtain the 2021 census counties from the 'tigris' package
+tract2021DE <- tracts(state = 'DE', year = 2019, cb = TRUE)
+
+# Join the xPx* values to the county geometry
+DE2021lieberson <- tract2021DE %>%
+  left_join(lieberson2021DE$xpx_star, by = 'GEOID')
+
# Visualize the xPx* values (2015-2019 5-year ACS) for Delaware, U.S.A., census tracts
+ggplot() +
+  geom_sf(
+    data = DE2021lieberson,
+    aes(fill = xPx_star),
+    size = 0.05,
+    color = 'white'
+  ) +
+  geom_sf(
+    data = tract2021DE,
+    fill = 'transparent',
+    color = 'white',
+    size = 0.2
+  ) +
+  theme_minimal() +
+  scale_fill_viridis_c(limits = c(0, 1)) +
+  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2015-2019 estimates') +
+  ggtitle(
+    'Isolation Index (Lieberson)\nDelaware census block groups to census tracts',
+    subtitle = 'Black non-Hispanic'
+  )
+

+
sessionInfo()
+
## R version 4.4.1 (2024-06-14 ucrt)
+## Platform: x86_64-w64-mingw32/x64
+## Running under: Windows 10 x64 (build 19045)
+## 
+## Matrix products: default
+## 
+## 
+## locale:
+## [1] LC_COLLATE=English_United States.utf8 
+## [2] LC_CTYPE=English_United States.utf8   
+## [3] LC_MONETARY=English_United States.utf8
+## [4] LC_NUMERIC=C                          
+## [5] LC_TIME=English_United States.utf8    
+## 
+## time zone: America/New_York
+## tzcode source: internal
 ## 
 ## attached base packages:
 ## [1] stats     graphics  grDevices utils     datasets  methods   base     
 ## 
 ## other attached packages:
-## [1] tigris_2.0       tidycensus_1.2.3 ndi_0.1.4.9000   ggplot2_3.4.0   
-## [5] dplyr_1.0.10     knitr_1.41      
+## [1] tigris_2.1       tidycensus_1.6.5 sf_1.0-16        ndi_0.1.6.9002  
+## [5] ggplot2_3.5.1    dplyr_1.1.4      knitr_1.48      
 ## 
 ## loaded via a namespace (and not attached):
-##  [1] Rcpp_1.0.9         lattice_0.20-45    tidyr_1.2.1        class_7.3-20      
-##  [5] assertthat_0.2.1   digest_0.6.30      psych_2.2.9        utf8_1.2.2        
-##  [9] R6_2.5.1           evaluate_0.18      e1071_1.7-12       highr_0.9         
-## [13] httr_1.4.4         pillar_1.8.1       rlang_1.0.6        curl_4.3.3        
-## [17] uuid_1.1-0         rstudioapi_0.14    jquerylib_0.1.4    Matrix_1.4-1      
-## [21] rmarkdown_2.18     labeling_0.4.2     readr_2.1.3        stringr_1.5.0     
-## [25] munsell_0.5.0      proxy_0.4-27       compiler_4.2.1     xfun_0.35         
-## [29] pkgconfig_2.0.3    mnormt_2.1.1       htmltools_0.5.4    tidyselect_1.2.0  
-## [33] tibble_3.1.8       viridisLite_0.4.1  fansi_1.0.3        tzdb_0.3.0        
-## [37] crayon_1.5.2       withr_2.5.0        sf_1.0-9           wk_0.7.0          
-## [41] MASS_7.3-57        rappdirs_0.3.3     grid_4.2.1         nlme_3.1-157      
-## [45] jsonlite_1.8.4     gtable_0.3.1       lifecycle_1.0.3    DBI_1.1.3         
-## [49] magrittr_2.0.3     units_0.8-0        scales_1.2.1       KernSmooth_2.23-20
-## [53] cli_3.4.1          stringi_1.7.8      cachem_1.0.6       farver_2.1.1      
-## [57] xml2_1.3.3         bslib_0.4.1        ellipsis_0.3.2     generics_0.1.3    
-## [61] vctrs_0.5.1        s2_1.1.1           tools_4.2.1        Cairo_1.6-0       
-## [65] glue_1.6.2         purrr_0.3.5        hms_1.1.2          parallel_4.2.1    
-## [69] fastmap_1.1.0      yaml_2.3.6         colorspace_2.0-3   classInt_0.4-8    
-## [73] rvest_1.0.3        sass_0.4.4
+## [1] gtable_0.3.5 xfun_0.47 bslib_0.8.0 psych_2.4.6.26 +## [5] lattice_0.22-6 tzdb_0.4.0 Cairo_1.6-2 vctrs_0.6.5 +## [9] tools_4.4.1 generics_0.1.3 curl_5.2.1 parallel_4.4.1 +## [13] tibble_3.2.1 proxy_0.4-27 fansi_1.0.6 highr_0.11 +## [17] pkgconfig_2.0.3 Matrix_1.7-0 KernSmooth_2.23-24 uuid_1.2-1 +## [21] lifecycle_1.0.4 farver_2.1.2 compiler_4.4.1 stringr_1.5.1 +## [25] munsell_0.5.1 mnormt_2.1.1 carData_3.0-5 htmltools_0.5.8.1 +## [29] class_7.3-22 sass_0.4.9 yaml_2.3.10 pillar_1.9.0 +## [33] car_3.1-2 crayon_1.5.3 jquerylib_0.1.4 tidyr_1.3.1 +## [37] MASS_7.3-61 classInt_0.4-10 cachem_1.1.0 wk_0.9.2 +## [41] abind_1.4-5 nlme_3.1-166 tidyselect_1.2.1 rvest_1.0.4 +## [45] digest_0.6.36 stringi_1.8.4 purrr_1.0.2 labeling_0.4.3 +## [49] fastmap_1.2.0 grid_4.4.1 colorspace_2.1-1 cli_3.6.3 +## [53] magrittr_2.0.3 utf8_1.2.4 e1071_1.7-14 readr_2.1.5 +## [57] withr_3.0.1 scales_1.3.0 rappdirs_0.3.3 rmarkdown_2.28 +## [61] httr_1.4.7 hms_1.1.3 evaluate_0.24.0 viridisLite_0.4.2 +## [65] s2_1.1.7 rlang_1.1.4 Rcpp_1.0.13 glue_1.7.0 +## [69] DBI_1.2.3 xml2_1.3.6 rstudioapi_0.16.0 jsonlite_1.8.8 +## [73] R6_2.5.1 units_0.8-5