diff --git a/DESCRIPTION b/DESCRIPTION index 6ddbb45..7fab9ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ndi Title: Neighborhood Deprivation Indices -Version: 0.1.6.9010 -Date: 2024-08-30 +Version: 0.1.6.9011 +Date: 2024-08-31 Authors@R: c(person(given = "Ian D.", family = "Buller", diff --git a/NAMESPACE b/NAMESPACE index b72273c..f102334 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(james_taeuber) export(krieger) export(lieberson) export(massey) +export(massey_duncan) export(messer) export(powell_wiley) export(sudano) diff --git a/NEWS.md b/NEWS.md index 60736b4..98d562d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # ndi (development version) -## ndi v0.1.6.9010 +## ndi v0.1.6.9011 ### New Features @@ -15,9 +15,10 @@ * Added `denton()` function to compute the aspatial racial or ethnic Relative Clustering (*RCL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `duncan_duncan()` function to compute the aspatial racial or ethnic Relative Centralization (*RCE*) based on [Duncan & Duncan (1955b)](https://doi.org/10.1086/221609) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `massey()` function to compute the aspatial racial or ethnic Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) +* Added `massey_duncan()` function to compute the aspatial racial or ethnic Absolute Concentration (*ACO*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) and Duncan, Cuzzort, & Duncan (1961; LC:60007089) #### New Function Capabilities -* Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. +* Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. * Added census block group computation for `anthopolos()` by specifying `geo == 'cbg'` or `geo == 'block group'` * Added `holder` argument to `atkinson()` function to toggle the computation with or without the Hölder mean. The function can now compute *A* without the Hölder mean. The default is `holder = FALSE`. * Added `crs` argument to `anthopolos()`, `bravo()`, and `white_blau()` functions to provide spatial projection of the distance-based metrics @@ -43,7 +44,7 @@ * Re-formatted code and documentation throughout for consistent readability * Renamed 'race/ethnicity' or 'racial/ethnic' to 'race or ethnicity' or 'racial or ethnic' throughout documentation to use more modern, inclusive, and appropriate language * Updated documentation about value range of *V* (White) from `{0 to 1}` to `{-Inf to Inf}` -* Added examples for `atkinson()`, `duncan_cuzzort()`, `duncan_duncan()`, `gini()`, `hoover()`, `james_taeuber()`, `lieberson()`, `theil()`, and `white_blau()` functions in vignettes and README +* Added examples for `atkinson()`, `duncan_cuzzort()`, `duncan_duncan()`, `gini()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `theil()`, and `white_blau()` functions in vignettes and README * Added example for `holder` argument in `atkinson()` function in README * Reordered the contents of 'ndi-package.R' thematically * Reordered the README examples alphabetically diff --git a/R/globals.R b/R/globals.R index cb8eed7..520fa99 100644 --- a/R/globals.R +++ b/R/globals.R @@ -271,6 +271,12 @@ globalVariables( 'crs', 'RCE', 'ACL', - 'RCL' + 'RCL', + 'ACO', + 'ALAND', + 'TotalPopE', + 'n_1', + 'n_2', + 't_cs' ) ) diff --git a/R/massey_duncan.R b/R/massey_duncan.R new file mode 100644 index 0000000..85c463d --- /dev/null +++ b/R/massey_duncan.R @@ -0,0 +1,414 @@ +#' Absolute Concentration based on Massey & Denton (1988) and Duncan, Cuzzort, & Duncan (1961) +#' +#' Compute the aspatial Absolute Concentration (Massey & Denton) of a selected racial or ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_small = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial or ethnic subgroup(s) as the comparison population. See Details for available choices. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Absolute Concentration (\emph{ACO}) of selected racial or ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281} and Duncan, Cuzzort, & Duncan (1961; LC:60007089). This function provides the computation of \emph{ACO} for any of the U.S. Census Bureau race or ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'place'}, \code{geo_large = 'csa'}, or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial or ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' \emph{ACO} is a measure of concentration of racial or ethnic populations within smaller geographical units that are located within larger geographical units. \emph{ACO} can range from 0 to 1 and represents the relative amount of physical space occupied by a racial or ethnic subgroup in a larger geographical unit. A value of 1 indicates that a racial or ethnic subgroup has achieved the maximum spatial concentration possible (all racial or ethnic subgroup members live in the smallest of the smaller geographical units). A value of 0 indicates the maximum deconcentration possible (all racial or ethnic subgroup members live in the largest of the smaller geographical units). +#' +#' Larger geographical units available include states \code{geo_large = 'state'}, counties \code{geo_large = 'county'}, census tracts \code{geo_large = 'tract'}, census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, and metropolitan divisions \code{geo_large = 'metro'}. Smaller geographical units available include, counties \code{geo_small = 'county'}, census tracts \code{geo_small = 'tract'}, and census block groups \code{geo_small = 'cbg'}. If a larger geographical unit is comprised of only one smaller geographical unit (e.g., a U.S county contains only one census tract), then the \emph{ACO} value returned is NA. If the larger geographical unit is census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, or metropolitan divisions \code{geo_large = 'metro'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{ACO} computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{aco}}{An object of class 'tbl' for the GEOID, name, and \emph{ACO} at specified larger census geographies.} +#' \item{\code{aco_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{ACO}.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_centroid st_distance st_drop_geometry st_transform st_within +#' @importFrom stats complete.cases +#' @importFrom stringr str_trim +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions places +#' @importFrom units drop_units set_units +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Index of spatial proximity of Black populations +#' ## of census tracts within counties within Georgia, U.S.A., counties (2020) +#' massey_duncan( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = c('NHoLB', 'HoLB') +#' ) +#' +#' } +#' +massey_duncan <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'place', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'cbg', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup)] + out_names <- names(selected_vars) # save for output + in_subgroup <- paste0(subgroup, 'E') + + # Acquire ACO variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'cbg' | geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('cbg', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + cbg = gsub('[^0-9\\.]', '', cbg) + ) + } + + # Grouping IDs for ACO computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) + } + if (geo_large == 'place') { + stopifnot(is.numeric(year), year >= 2011) # Places only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::places( + year = year, state = unique(out_dat$state)) + )) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + place = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metropolitan Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + + # Count of racial or ethnic subgroup populations + ## Count of racial or ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = as.data.frame(.)[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(as.data.frame(.)[, in_subgroup])) + } + + # Compute ACO + ## From Denton & Massey (1988) https://doi.org/10.1093/sf/67.2.281 + ## ACO = 1-\frac{\sum_{i=1}^{n}\frac{x_{i}a_{i}}{X}-\sum_{i=1}^{n_{1}}\frac{t_{i}a_{i}}{T_{1}}} + ## {\sum_{i=n^{2}}^{n}\frac{t_{i}a_{i}}{T_{2}}-\sum_{i=1}^{n_{1}}\frac{t_{i}a_{i}}{T_{1}}} + ## Where for i smaller geographical units are ordered by geographic size from smallest to largest + ## a_{i} denotes the land area of smaller geographical unit i + ## x_{i} denotes the racial or ethnic subgroup population of smaller geographical unit i + ## X denotes the racial or ethnic subgroup population of a larger geographical unit + ## n_{1} denotes the rank of the smaller geographic unit where the cumulative total population of + ## smaller geographical units equals the total racial or ethnic subgroup population of a + ## larger geographical unit, summing from the smallest unit up + ## n_{2} denotes the rank of the smaller geographic unit where the cumulative total population of + ## smaller geographical units equals a total racial or ethnic subgroup population + ## totaling from the largest unit down + ## t_{i} denotes the total population of smaller geographical unit i + ## T_{1} denotes the total population of smaller geographical units from 1 to n_{1} + ## T_{2} denotes the total population of smaller geographical units from n_{2} to n + + ## Compute + out_tmp <- out_dat %>% + .[.$oid != 'NANA', ] %>% + split(., f = list(.$oid)) %>% + lapply(., FUN = aco_fun, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate( + ACO = values, + oid = ind + ) %>% + dplyr::select(ACO, oid) %>% + sf::st_drop_geometry() + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup)] %>% + sf::st_drop_geometry() + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) + if (geo_large == 'state') { + out <- out %>% + dplyr::select(oid, state, ACO) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, ACO) + } + if (geo_large == 'county') { + out <- out %>% + dplyr::select(oid, state, county, ACO) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, ACO) + } + if (geo_large == 'tract') { + out <- out %>% + dplyr::select(oid, state, county, tract, ACO) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, ACO) + } + if (geo_large == 'place') { + out <- out %>% + dplyr::select(oid, place, ACO) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, place, ACO) + } + if (geo_large == 'cbsa') { + out <- out %>% + dplyr::select(oid, cbsa, ACO) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, ACO) + } + if (geo_large == 'csa') { + out <- out %>% + dplyr::select(oid, csa, ACO) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, ACO) + } + if (geo_large == 'metro') { + out <- out %>% + dplyr::select(oid, metro, ACO) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, ACO) + } + + out <- out %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::filter(!is.na(GEOID)) %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(aco = out, aco_data = out_dat, missing = missingYN) + + return(out) +} diff --git a/R/ndi-package.R b/R/ndi-package.R index 6f94efc..d41a11a 100644 --- a/R/ndi-package.R +++ b/R/ndi-package.R @@ -36,11 +36,11 @@ #' #' \code{\link{james_taeuber}} Computes the aspatial Dissimilarity Index (\emph{D}) based on James & Taeuber (1985) \doi{10.2307/270845}. #' -#' \code{\link{krieger}} Computes the aspatial Index of Concentration at the Extremes based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. -#' #' \code{\link{lieberson}} Computes the aspatial Isolation Index (\emph{xPx\*}) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and Bell (1954) \doi{10.2307/2574118}. #' #' \code{\link{massey}} Computes the aspatial Absolute Clustering (\emph{ACL}) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. +#' +#' \code{\link{massey_duncan}} Computes the aspatial Absolute Concentration (\emph{ACO}) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281} and Duncan, Cuzzort, & Duncan (1961; LC:60007089). #' #' \code{\link{sudano}} Computes the aspatial Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. #' @@ -58,6 +58,8 @@ #' #' \code{\link{gini}} Also retrieves the aspatial Gini Index (\emph{G}) of income inequality based on Gini (1921) \doi{10.2307/2223319}. #' +#' \code{\link{krieger}} Computes the aspatial Index of Concentration at the Extremes based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. +#' #' \strong{Pre-formatted U.S. Census Data} #' #' \code{\link{DCtracts2020}} A sample dataset containing information about U.S. Census American Community Survey 5-year estimate data for the District of Columbia census tracts (2020). The data are obtained from the \code{\link[tidycensus]{get_acs}} function and formatted for the \code{\link{messer}} and \code{\link{powell_wiley}} functions input. diff --git a/R/utils.R b/R/utils.R index 6d538ad..ff320b6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -397,3 +397,73 @@ rcl_fun <- function(x, crs, omit_NAs) { return(RCL) } } + +# Internal function for Absolute Concentration +## From Denton & Massey (1988) https://doi.org/10.1093/sf/67.2.281 +## Returns NA value if only one smaller geography in a larger geography +aco_fun <- function(x, omit_NAs) { + # x <- out_tmp[[12]] + xx <- x[ , c('TotalPopE', 'subgroup', 'ALAND')] + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(sf::st_drop_geometry(xx)), ] } + if (nrow(sf::st_drop_geometry(x)) < 2 || any(sf::st_drop_geometry(xx) < 0) || any(is.na(sf::st_drop_geometry(xx)))) { + NA + } else { + a_i <- xx$ALAND + x_i <- xx$subgroup + X <- sum(x_i, na.rm = TRUE) + xx_tmp <- xx %>% + sf::st_drop_geometry(xx) %>% + dplyr::arrange(ALAND) %>% + dplyr::mutate( + t_cs = cumsum(TotalPopE), + n_1 = t_cs <= X, + ) + if (!(TRUE %in% xx_tmp$n_1)) { + xx_1 <- xx_tmp %>% + dplyr::slice(1) + } else { + xx_1 <- xx_tmp %>% + dplyr::filter(n_1 == TRUE) + } + if (nrow(xx_1) == 1 & 0 %in% xx_1$TotalPopE) { + xx_1 <- xx_tmp %>% + dplyr::filter(TotalPopE > 0) %>% + dplyr::slice(1) + } + T_1 <- xx_1 %>% + dplyr::summarise( + T_1 = sum(TotalPopE) + ) %>% + unlist() + xx_tmp <- xx %>% + sf::st_drop_geometry(xx) %>% + dplyr::arrange(-ALAND) %>% + dplyr::mutate( + t_cs = cumsum(TotalPopE), + n_2 = t_cs <= X, + ) + if (!(TRUE %in% xx_tmp$n_2)) { + xx_2 <- xx_tmp %>% + dplyr::slice(1) + } else { + xx_2 <- xx_tmp %>% + dplyr::filter(n_2 == TRUE) + } + if (nrow(xx_2) == 1 & 0 %in% xx_2$TotalPopE) { + xx_2 <- xx_tmp %>% + dplyr::filter(TotalPopE > 0) %>% + dplyr::slice(1) + } + T_2 <- xx_2 %>% + dplyr::summarise( + T_2 = sum(TotalPopE) + ) %>% + unlist() + num <- sum((x_i * a_i) / X, na.rm = TRUE) - sum((xx_1$TotalPopE * xx_1$ALAND) / T_1, na.rm = TRUE) + denom <- sum((xx_2$TotalPopE * xx_2$ALAND) / T_2, na.rm = TRUE) - sum((xx_1$TotalPopE * xx_1$ALAND) / T_1, na.rm = TRUE) + ACO_tmp <- (num / denom) + if (is.infinite(ACO_tmp) | is.na(ACO_tmp)) { ACO_tmp <- 0 } + ACO <- 1 - ACO_tmp + return(ACO) + } +} diff --git a/README.md b/README.md index 49b97b0..17c2a1a 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ [![DOI](https://zenodo.org/badge/521439746.svg)](https://zenodo.org/badge/latestdoi/521439746) -**Date repository last updated**: 2024-08-30 +**Date repository last updated**: 2024-08-31 ### Overview @@ -103,6 +103,10 @@ To install the development version from GitHub: Compute the aspatial racial or ethnic Absolute Clustering (ACL) based on Massey & Denton (1988) +massey_duncan +Compute the aspatial racial or ethnic Absolute Concentration (ACO) based on Massey & Denton (1988) and Duncan, Cuzzort, & Duncan (1961; LC:60007089) + + messer Compute the aspatial Neighborhood Deprivation Index (NDI) based on Messer et al. (2006) @@ -1368,6 +1372,51 @@ ggplot() + ![](man/figures/acl.png) +```r +# --------------------------------------------------------- # +# Compute aspatial Absolute Concentration (Massey & Denton) # +# --------------------------------------------------------- # + +# Absolute Concentration based on Massey & Denton (1988) and Duncan, Cuzzort, & Duncan (1961) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: census tract +## Selected small geography: census block group +ACO_2020_DC <- massey_duncan( + geo_large = 'tract', + geo_small = 'cbg', + state = 'DC', + year = 2020, + subgroup = 'NHoLB' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the ACO (Massey & Denton) values to the census tract geometry +ACO_2020_DC <- tract_2020_DC %>% + left_join(ACO_2020_DC$aco, by = 'GEOID') + +ggplot() + + geom_sf( + data = ACO_2020_DC, + aes(fill = ACO), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Absolute Concentration (Massey & Denton)\n + Washington, D.C. census block groups to tracts', + subtitle = 'Black non-Hispanic' + ) +``` + +![](man/figures/aco.png) + ```r # ------------------------------------------------------------ # # Compute aspatial racial or ethnic Location Quotient (Sudano) # diff --git a/cran-comments.md b/cran-comments.md index 761da70..4957cf6 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -8,9 +8,10 @@ * Added `james_taeuber()` function to compute the aspatial racial or ethnic Dissimilarity Index (*D*) based on [James & Taeuber (1985)](https://doi.org/10.2307/270845) * Added `lieberson()` function to compute the aspatial racial or ethnic Isolation Index (_xPx\*_) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and and [Bell (1954)](https://doi.org/10.2307/2574118) * Added `massey()` function to compute the aspatial racial or ethnic Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) + * Added `massey_duncan()` function to compute the aspatial racial or ethnic Absolute Concentration (*ACO*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) and Duncan, Cuzzort, & Duncan (1961; LC:60007089) * Added `theil()` function the aspatial racial or ethnic Entropy (*H*) based on Theil (1972; ISBN:978-0-444-10378-9) and [Theil & Finizza (1971)](https://doi.org/110.1080/0022250X.1971.9989795) * Added `white_blau()` function to compute an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) - * Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. + * Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. * Added census block group computation for `anthopolos()` by specifying `geo == 'cbg'` or `geo == 'block group'` * Added `holder` argument to `atkinson()` function to toggle the computation with or without the Hölder mean. The function can now compute *A* without the Hölder mean. The default is `holder = FALSE`. * Added `crs` argument to `anthopolos()`, `bravo()`, and `white_blau()` functions to provide spatial projection of the distance-based metrics @@ -28,7 +29,7 @@ * Renamed 'race/ethnicity' or 'racial/ethnic' to 'race or ethnicity' or 'racial or ethnic' throughout documentation to use more modern, inclusive, and appropriate language * Updated documentation about value range of *V* (White) from `{0 to 1}` to `{-Inf to Inf}` * Split up vignette into three separate vignettes: 'ndi1', 'ndi2', and 'ndi3' for the *NDI*, racial or ethnic residential segregation, and additional socioeconomic disparity indices, respectively - * Added examples for `atkinson()`, `duncan_cuzzort()`, `duncan_duncan()`, `gini()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `theil()`, and `white_blau()` functions in vignettes and README + * Added examples for `atkinson()`, `duncan_cuzzort()`, `duncan_duncan()`, `gini()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `theil()`, and `white_blau()` functions in vignettes and README * Added example for `holder` argument in `atkinson()` function in README * Reordered the README examples alphabetically * Reordered the vignette examples to group the racial or ethnic residential segregation indices @@ -45,7 +46,7 @@ * * -* Some tests and examples for `anthopolos()`, `atkinson()`, `bell()`, `bemanian_beyer()`, `bravo()`, `denton()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `gini()`, `hoover()`, `james_taeuber()`, `krieger()`, `lieberson()`, `massey()`, `messer()`, `powell_wiley()`, `sudano()`, `theil()`, `white()`, and `white_blau()` functions require a Census API key so they are skipped if NULL or not run +* Some tests and examples for `anthopolos()`, `atkinson()`, `bell()`, `bemanian_beyer()`, `bravo()`, `denton()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `gini()`, `hoover()`, `james_taeuber()`, `krieger()`, `lieberson()`, `massey()`, `massey_duncan()`, `messer()`, `powell_wiley()`, `sudano()`, `theil()`, `white()`, and `white_blau()` functions require a Census API key so they are skipped if NULL or not run ## Test environments * local Windows install, R 4.4.1 diff --git a/inst/CITATION b/inst/CITATION index b05ec2b..36430fb 100755 --- a/inst/CITATION +++ b/inst/CITATION @@ -3,7 +3,7 @@ bibentry(bibtype = 'manual', author = as.person('Ian D. Buller'), publisher = 'The Comprehensive R Archive Network', year = '2024', - number = '0.1.6.9010.', + number = '0.1.6.9011.', doi = '10.5281/zenodo.6989030', url = 'https://cran.r-project.org/package=ndi', @@ -422,6 +422,44 @@ bibentry(bibtype = 'Article', header = 'If you computed ACL (Massey & Denton) values, please also cite:' ) +bibentry(bibtype = 'Article', + title = 'The Dimensions of Residential Segregation', + author = c(as.person('Douglas S. Massey'), + as.person('Nancy A. Denton')), + journal = 'Social Forces', + year = '1988', + volume = '67', + issue = '2', + pages = '281--315', + doi = '10.1093/sf/67.2.281', + + textVersion = + paste('Douglas S. Massey & Nancy A. Denton (1988).', + 'The Dimensions of Residential Segregation.', + 'Social Forces, 67(1), 281-315.', + 'DOI:10.1093/sf/67.2.281'), + + header = 'If you computed ACO (Massey & Denton) values, please also cite (1):' +) + +bibentry(bibtype = 'Book', + title = 'Statistical Geography: Problems in Analyzing Area Data', + author = c(as.person('Otis D. Duncan'), + as.person('Ray P. Cuzzort'), + as.person('Beverly Duncan')), + year = '1961', + publisher = 'Free Press', + lc = '60007089', + + textVersion = + paste('Otis D. Duncan, Ray P. Cuzzort, & Beverly Duncan (1961).', + 'Statistical Geography: Problems in Analyzing Area Data.', + 'Free Press', + 'LC:60007089'), + + header = 'And (2):' +) + bibentry(bibtype = 'Article', title = 'The Dimensions of Residential Segregation', author = c(as.person('Douglas S. Massey'), diff --git a/man/figures/aco.png b/man/figures/aco.png new file mode 100644 index 0000000..4a964c8 Binary files /dev/null and b/man/figures/aco.png differ diff --git a/man/massey_duncan.Rd b/man/massey_duncan.Rd new file mode 100644 index 0000000..98f9fdf --- /dev/null +++ b/man/massey_duncan.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/massey_duncan.R +\name{massey_duncan} +\alias{massey_duncan} +\title{Absolute Concentration based on Massey & Denton (1988) and Duncan, Cuzzort, & Duncan (1961)} +\usage{ +massey_duncan( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_small = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial or ethnic subgroup(s) as the comparison population. See Details for available choices.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{aco}}{An object of class 'tbl' for the GEOID, name, and \emph{ACO} at specified larger census geographies.} +\item{\code{aco_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{ACO}.} +} +} +\description{ +Compute the aspatial Absolute Concentration (Massey & Denton) of a selected racial or ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute the aspatial Absolute Concentration (\emph{ACO}) of selected racial or ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281} and Duncan, Cuzzort, & Duncan (1961; LC:60007089). This function provides the computation of \emph{ACO} for any of the U.S. Census Bureau race or ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'place'}, \code{geo_large = 'csa'}, or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial or ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{ACO} is a measure of concentration of racial or ethnic populations within smaller geographical units that are located within larger geographical units. \emph{ACO} can range from 0 to 1 and represents the relative amount of physical space occupied by a racial or ethnic subgroup in a larger geographical unit. A value of 1 indicates that a racial or ethnic subgroup has achieved the maximum spatial concentration possible (all racial or ethnic subgroup members live in the smallest of the smaller geographical units). A value of 0 indicates the maximum deconcentration possible (all racial or ethnic subgroup members live in the largest of the smaller geographical units). + +Larger geographical units available include states \code{geo_large = 'state'}, counties \code{geo_large = 'county'}, census tracts \code{geo_large = 'tract'}, census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, and metropolitan divisions \code{geo_large = 'metro'}. Smaller geographical units available include, counties \code{geo_small = 'county'}, census tracts \code{geo_small = 'tract'}, and census block groups \code{geo_small = 'cbg'}. If a larger geographical unit is comprised of only one smaller geographical unit (e.g., a U.S county contains only one census tract), then the \emph{ACO} value returned is NA. If the larger geographical unit is census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, or metropolitan divisions \code{geo_large = 'metro'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{ACO} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Index of spatial proximity of Black populations + ## of census tracts within counties within Georgia, U.S.A., counties (2020) + massey_duncan( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/man/ndi-package.Rd b/man/ndi-package.Rd index aad0b77..9969850 100644 --- a/man/ndi-package.Rd +++ b/man/ndi-package.Rd @@ -43,12 +43,12 @@ Key content of the 'ndi' package include:\cr \code{\link{james_taeuber}} Computes the aspatial Dissimilarity Index (\emph{D}) based on James & Taeuber (1985) \doi{10.2307/270845}. -\code{\link{krieger}} Computes the aspatial Index of Concentration at the Extremes based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. - \code{\link{lieberson}} Computes the aspatial Isolation Index (\emph{xPx\*}) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and Bell (1954) \doi{10.2307/2574118}. \code{\link{massey}} Computes the aspatial Absolute Clustering (\emph{ACL}) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. +\code{\link{massey_duncan}} Computes the aspatial Absolute Concentration (\emph{ACO}) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281} and Duncan, Cuzzort, & Duncan (1961; LC:60007089). + \code{\link{sudano}} Computes the aspatial Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. \code{\link{theil}} Computes the aspatial Entropy (\emph{H}) based on Theil (1972; ISBN-13:978-0-444-10378-9) and Theil & Finizza (1971) \doi{110.1080/0022250X.1971.9989795}. @@ -65,6 +65,8 @@ Key content of the 'ndi' package include:\cr \code{\link{gini}} Also retrieves the aspatial Gini Index (\emph{G}) of income inequality based on Gini (1921) \doi{10.2307/2223319}. +\code{\link{krieger}} Computes the aspatial Index of Concentration at the Extremes based on Feldman et al. (2015) \doi{10.1136/jech-2015-205728} and Krieger et al. (2016) \doi{10.2105/AJPH.2015.302955}. + \strong{Pre-formatted U.S. Census Data} \code{\link{DCtracts2020}} A sample dataset containing information about U.S. Census American Community Survey 5-year estimate data for the District of Columbia census tracts (2020). The data are obtained from the \code{\link[tidycensus]{get_acs}} function and formatted for the \code{\link{messer}} and \code{\link{powell_wiley}} functions input. diff --git a/tests/testthat/test-massey_duncan.R b/tests/testthat/test-massey_duncan.R new file mode 100644 index 0000000..6f85169 --- /dev/null +++ b/tests/testthat/test-massey_duncan.R @@ -0,0 +1,87 @@ +context('massey_duncan') + +# ---------------------- # +# massey_duncan testthat # +# ---------------------- # + +test_that('massey_duncan throws error with invalid arguments', { + # Unavailable geography + expect_error( + massey_duncan( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + expect_error( + massey_duncan( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error( + massey_duncan( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + + # Unavailable subgroup + expect_error( + massey_duncan( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + ) + ) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error( + massey_duncan( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + +}) + +test_that('massey_duncan works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_silent(massey_duncan( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + )) + + expect_silent( + massey_duncan( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + + expect_silent(massey_duncan( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + quiet = TRUE + )) + +}) diff --git a/vignettes/ndi2.Rmd b/vignettes/ndi2.Rmd index 5ae8b4c..6bec3eb 100644 --- a/vignettes/ndi2.Rmd +++ b/vignettes/ndi2.Rmd @@ -57,8 +57,8 @@ Since version v0.1.1, the [*ndi*](https://CRAN.R-project.org/package=ndi) packag **Concentration**: * `hoover()` function that computes Delta (*DEL*) based on [Hoover (1941)](https://doi.org/10.1017/S0022050700052980) and Duncan, Cuzzort, & Duncan (1961; LC:60007089) - * `massey()` function that compute Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) - * `denton()` function that compute Relative Clustering (*RCL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) + * `massey_duncan()` function that computes Absolute Concentration (*ACO*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) and Duncan, Cuzzort, & Duncan (1961; LC:60007089) + * Relative Concentration (Planned) **Centralization**: @@ -67,9 +67,9 @@ Since version v0.1.1, the [*ndi*](https://CRAN.R-project.org/package=ndi) packag **Clustering**: - * Absolute Clustering (Planned) + * `massey()` function that compute Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * `white_blau()` function that computes an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) - * Relative Clustering (Planned) + * `denton()` function that compute Relative Clustering (*RCL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Distance Decay Interaction (Planned) * Distance Decay Isolation (Planned) @@ -745,120 +745,59 @@ ggplot() + ) ``` -#### Compute Absolute Clustering (*ACL*) +#### Compute Absolute Concentration (*ACO*) -Compute the racial or ethnic *ACL* values (2014-2018 5-year ACS) for census block groups within census tracts of Harris County, TX. This metric is based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). *ACL* is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. *ACL* can range in value from 0 to Inf and represents the degree to which an area is a racial or ethnic enclave. A value of 1 indicates there is no differential clustering of the racial or ethnic subgroup. A value greater than 1 indicates the racial or ethnic subgroup live nearer to one another. A value less than 1 indicates the racial or ethnic subgroup do not live near one another. +Compute the racial or ethnic *ACO* values (2015-2019 5-year ACS) for census tracts within core-based statistical areas of Wisconsin. This metric is based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) and Duncan, Cuzzort, & Duncan (1961; LC:60007089). *ACO* is a measure of concentration of racial or ethnic populations within smaller geographical units that are located within larger geographical units. *ACO* can range from 0 to 1 and represents the relative amount of physical space occupied by a racial or ethnic subgroup in a larger geographical unit. A value of 1 indicates that a racial or ethnic subgroup has achieved the maximum spatial concentration possible (all racial or ethnic subgroup members live in the smallest of the smaller geographical units). A value of 0 indicates the maximum deconcentration possible (all racial or ethnic subgroup members live in the largest of the smaller geographical units). -```{r massey_prep, results = 'hide'} -massey2018HTX <- massey( - geo_large = 'tract', - geo_small = 'cbg', - state = 'TX', - county = 'Harris County', - year = 2018, +```{r massey_duncan_prep, results = 'hide'} +massey_duncan2019WI <- massey_duncan( + geo_large = 'cbsa', + geo_small = 'tract', + state = c('WI', 'IL', 'MI', 'MN'), + year = 2019, subgroup = c('NHoLB', 'HoLB') ) -# Obtain the 2018 census tracts in Harris County, TX, from the 'tigris' package -tract2018 <- tracts(year = 2018, state = 'TX') -# Obtain the 2018 Texas counties from the 'tigris' package -county2018 <- counties(state = 'TX', year = 2018, cb = TRUE) +# Obtain the 2019 census-designated places from the 'tigris' package +cbsa2019 <- core_based_statistical_areas(year = 2019, cb = TRUE) +# Obtain the 2019 state from the 'tigris' package +states2019 <- states(year = 2019, cb = TRUE) -# Join the ACL values to the census tract geometries and filter for Harris County, TX -HTX2010massey <- tract2018 %>% - left_join(massey2018HTX$acl, by = 'GEOID') %>% +# Join the ACO values to the census-designated places geometries and filter for Connecticut +WI2019massey_duncan <- cbsa2019 %>% + left_join(massey_duncan2019WI$aco, by = 'GEOID') %>% filter(!st_is_empty(.)) %>% - filter(!is.na(ACL)) %>% - st_filter(county2018 %>% filter(NAME == 'Harris')) %>% + filter(!is.na(ACO)) %>% + st_filter(states2019 %>% filter(STUSPS == 'WI'), .predicate = st_within) %>% st_make_valid() ``` -```{r massey_plot, fig.height = 4, fig.width = 7} -# Visualize the ACL values (2013-2017 5-year ACS) for census block groups within census tracts of Harris County, TX +```{r massey_duncan_plot, fig.height = 4, fig.width = 7} +# Visualize the ACO values (2015-2019 5-year ACS) for census tracts within core-based statistical areas of Wisconsin. ggplot() + geom_sf( - data = HTX2010massey, - aes(fill = ACL) + data = WI2019massey_duncan, + aes(fill = ACO) ) + geom_sf( - data = county2018 %>% filter(NAME == 'Harris County'), + data = states2019 %>% filter(STUSPS == 'WI'), fill = 'transparent', color = 'black', size = 0.2 ) + theme_minimal() + - scale_fill_gradient2( - low = '#998ec3', - mid = '#f7f7f7', - high = '#f1a340', - midpoint = 0 - ) + - labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2015-2019 estimates') + ggtitle( - 'Absolute Clustering (Massey & Denton)\nCensus block groups within census tracts of Harris County, TX', + 'Absolute Concentration (Massey & Duncan)\ncensus tracts within core-based statistical areas of Wisconsin', subtitle = 'Black population' ) ``` -#### Compute Relative Clustering (*RCL*) - -Compute the racial or ethnic *RCL* values (2014-2018 5-year ACS) for census block groups within census tracts of Harris County, TX. This metric is based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). *RCL* equals 0 when the racial or ethnic subgroup population displays the same amount of clustering as the referent racial or ethnic subgroup population, and is positive whenever the racial or ethnic subgroup population members display greater clustering than is typical of the the referent racial or ethnic subgroup population. If the racial or ethnic subgroup population members were less clustered than the the referent racial or ethnic subgroup population, then *RCL* would be negative. - -```{r denton_prep, results = 'hide'} -denton2018HTX <- denton( - geo_large = 'tract', - geo_small = 'cbg', - state = 'TX', - county = 'Harris County', - year = 2018, - subgroup = 'NHoLB', - subgroup_ref = 'NHoLW' -) - -# Obtain the 2018 census tracts in Harris County, TX, from the 'tigris' package -tract2018 <- tracts(year = 2018, state = 'TX') -# Obtain the 2018 Texas counties from the 'tigris' package -county2018 <- counties(state = 'TX', year = 2018, cb = TRUE) - -# Join the RCL values to the census tract geometries and filter for Harris County, TX -HTX2010denton <- tract2018 %>% - left_join(denton2018HTX$rcl, by = 'GEOID') %>% - filter(!st_is_empty(.)) %>% - filter(!is.na(RCL)) %>% - st_filter(county2018 %>% filter(NAME == 'Harris')) %>% - st_make_valid() -``` - -```{r denton_plot, fig.height = 4, fig.width = 7} -# Visualize the RCL values (2013-2017 5-year ACS) for census block groups within census tracts of Harris County, TX -ggplot() + - geom_sf( - data = HTX2010denton, - aes(fill = RCL) - ) + - geom_sf( - data = county2018 %>% filter(NAME == 'Harris County'), - fill = 'transparent', - color = 'black', - size = 0.2 - ) + - theme_minimal() + - scale_fill_gradient2( - low = '#998ec3', - mid = '#f7f7f7', - high = '#f1a340', - midpoint = 0 - ) + - labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + - ggtitle( - 'Relative Clustering (Massey & Denton)\nCensus block groups within census tracts of Harris County, TX', - subtitle = 'Black non-Hispanic vs. white non-Hispanic' - ) -``` - #### Compute Absolute Centralization (*ACE*) -Compute the racial or ethnic *ACE* values (2013-2017 5-year ACS) for census block groups within census-designated places of Connecticut. This metric is based on Duncan, Cuzzort, & Duncan (1961; LC:60007089) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). *ACE* is a measure of the degree to which racial or ethnic populations within smaller geographical units are located near the center of a larger geographical unit. *ACE* can range in value from -1 to 1 and represents the spatial distribution of racial or ethnic populations within smaller geographical units compared to the distribution of land area around the center of a larger geographical unit. Positive values indicate a tendency for racial or ethnic populations to reside close to the center of a larger geographical unit, while negative values indicate a tendency to live in outlying areas. A score of 0 means that racial or ethnic populations have a uniform distribution throughout a larger geographical unit. *ACE* gives the proportion of racial or ethnic populations required to change residence to achieve a uniform distribution of population around the center of a larger geographical unit. +Compute the racial or ethnic *ACE* values (2013-2017 5-year ACS) for census block groups within census-designated places of Connecticut. This metric is based on Duncan, Cuzzort, & Duncan (1961; LC:60007089) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). *ACE* is a measure of the degree to which racial or ethnic populations within smaller geographical units are located near the center of a larger geographical unit. *ACO* is a measure of concentration of racial or ethnic populations within smaller geographical units that are located within larger geographical units. *ACO* can range from 0 to 1 and represents the relative amount of physical space occupied by a racial or ethnic subgroup in a larger geographical unit. A value of 1 indicates that a racial or ethnic subgroup has achieved the maximum spatial concentration possible (all racial or ethnic subgroup members live in the smallest of the smaller geographical units). A value of 0 indicates the maximum deconcentration possible (all racial or ethnic subgroup members live in the largest of the smaller geographical units). + Note: The original metric used the location of the central business district (CBD) to compute the metric, but the U.S. Census Bureau has not defined CBDs for U.S. cities since the [1982 Census of Retail Trade](http://www.census.gov/geo/www/cbd.html). Therefore, this function uses the the centroids of each larger geographical unit as the 'centre', but may not represent the current CBD. @@ -971,6 +910,61 @@ ggplot() + ) ``` +#### Compute Absolute Clustering (*ACL*) + +Compute the racial or ethnic *ACL* values (2014-2018 5-year ACS) for census block groups within census tracts of Harris County, TX. This metric is based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). *ACL* is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. *ACL* can range in value from 0 to Inf and represents the degree to which an area is a racial or ethnic enclave. A value of 1 indicates there is no differential clustering of the racial or ethnic subgroup. A value greater than 1 indicates the racial or ethnic subgroup live nearer to one another. A value less than 1 indicates the racial or ethnic subgroup do not live near one another. + +```{r massey_prep, results = 'hide'} +massey2018HTX <- massey( + geo_large = 'tract', + geo_small = 'cbg', + state = 'TX', + county = 'Harris County', + year = 2018, + subgroup = c('NHoLB', 'HoLB') +) + +# Obtain the 2018 census tracts in Harris County, TX, from the 'tigris' package +tract2018 <- tracts(year = 2018, state = 'TX') +# Obtain the 2018 Texas counties from the 'tigris' package +county2018 <- counties(state = 'TX', year = 2018, cb = TRUE) + +# Join the ACL values to the census tract geometries and filter for Harris County, TX +HTX2010massey <- tract2018 %>% + left_join(massey2018HTX$acl, by = 'GEOID') %>% + filter(!st_is_empty(.)) %>% + filter(!is.na(ACL)) %>% + st_filter(county2018 %>% filter(NAME == 'Harris')) %>% + st_make_valid() +``` + +```{r massey_plot, fig.height = 4, fig.width = 7} +# Visualize the ACL values (2013-2017 5-year ACS) for census block groups within census tracts of Harris County, TX +ggplot() + + geom_sf( + data = HTX2010massey, + aes(fill = ACL) + ) + + geom_sf( + data = county2018 %>% filter(NAME == 'Harris County'), + fill = 'transparent', + color = 'black', + size = 0.2 + ) + + theme_minimal() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + midpoint = 0 + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + + ggtitle( + 'Absolute Clustering (Massey & Denton)\nCensus block groups within census tracts of Harris County, TX', + subtitle = 'Black population' + ) +``` + #### Compute an index of spatial proximity (*SP*) Compute an index of spatial proximity (2010-2014 5-year ACS) for census tracts within combined statistical areas of Georgia. This metric is based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) that designed the metric to identify racial or ethnic enclaves. *SP* is a measure of clustering of racial or ethnic populations within smaller geographical areas that are located within larger geographical areas. *SP* can range in value from 0 to Inf and represents the degree to which an area is a racial or ethnic enclave. A value of 1 indicates there is no differential clustering between subgroup and referent group members. A value greater than 1 indicates subgroup members live nearer to one another than to referent subgroup members. A value less than 1 indicates subgroup live nearer to and referent subgroup members than to their own subgroup members. @@ -1028,6 +1022,62 @@ ggplot() + ) ``` +#### Compute Relative Clustering (*RCL*) + +Compute the racial or ethnic *RCL* values (2014-2018 5-year ACS) for census block groups within census tracts of Harris County, TX. This metric is based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). *RCL* equals 0 when the racial or ethnic subgroup population displays the same amount of clustering as the referent racial or ethnic subgroup population, and is positive whenever the racial or ethnic subgroup population members display greater clustering than is typical of the the referent racial or ethnic subgroup population. If the racial or ethnic subgroup population members were less clustered than the the referent racial or ethnic subgroup population, then *RCL* would be negative. + +```{r denton_prep, results = 'hide'} +denton2018HTX <- denton( + geo_large = 'tract', + geo_small = 'cbg', + state = 'TX', + county = 'Harris County', + year = 2018, + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW' +) + +# Obtain the 2018 census tracts in Harris County, TX, from the 'tigris' package +tract2018 <- tracts(year = 2018, state = 'TX') +# Obtain the 2018 Texas counties from the 'tigris' package +county2018 <- counties(state = 'TX', year = 2018, cb = TRUE) + +# Join the RCL values to the census tract geometries and filter for Harris County, TX +HTX2010denton <- tract2018 %>% + left_join(denton2018HTX$rcl, by = 'GEOID') %>% + filter(!st_is_empty(.)) %>% + filter(!is.na(RCL)) %>% + st_filter(county2018 %>% filter(NAME == 'Harris')) %>% + st_make_valid() +``` + +```{r denton_plot, fig.height = 4, fig.width = 7} +# Visualize the RCL values (2013-2017 5-year ACS) for census block groups within census tracts of Harris County, TX +ggplot() + + geom_sf( + data = HTX2010denton, + aes(fill = RCL) + ) + + geom_sf( + data = county2018 %>% filter(NAME == 'Harris County'), + fill = 'transparent', + color = 'black', + size = 0.2 + ) + + theme_minimal() + + scale_fill_gradient2( + low = '#998ec3', + mid = '#f7f7f7', + high = '#f1a340', + midpoint = 0 + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + + ggtitle( + 'Relative Clustering (Massey & Denton)\nCensus block groups within census tracts of Harris County, TX', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +``` + ```{r system} sessionInfo() ``` diff --git a/vignettes/ndi2.html b/vignettes/ndi2.html index 8eabdb9..28749dd 100644 --- a/vignettes/ndi2.html +++ b/vignettes/ndi2.html @@ -12,7 +12,7 @@ - + 2. Racial or Ethnic Residential Segregation Indices @@ -341,7 +341,7 @@

2. Racial or Ethnic Residential Segregation Indices

Ian D. Buller (GitHub: @idblr)

-

2024-08-30

+

2024-08-31

@@ -409,12 +409,10 @@

Racial or Ethnic Residential Segregation Indices

  • hoover() function that computes Delta (DEL) based on Hoover (1941) and Duncan, Cuzzort, & Duncan (1961; LC:60007089)
  • -
  • massey() function that compute Absolute Clustering -(ACL) based on Massey & Denton -(1988)
  • -
  • denton() function that compute Relative Clustering -(RCL) based on Massey & Denton -(1988)
  • +
  • massey_duncan() function that computes Absolute +Concentration (ACO) based on Massey & Denton +(1988) and Duncan, Cuzzort, & Duncan (1961; LC:60007089)
  • +
  • Relative Concentration (Planned)
  • Centralization:

    Clustering:

    @@ -1306,110 +1308,116 @@

    Compute Delta (DEL)

    )

    -
    -

    Compute Absolute Clustering (ACL)

    -

    Compute the racial or ethnic ACL values (2014-2018 5-year -ACS) for census block groups within census tracts of Harris County, TX. +

    +

    Compute Absolute Concentration (ACO)

    +

    Compute the racial or ethnic ACO values (2015-2019 5-year +ACS) for census tracts within core-based statistical areas of Wisconsin. This metric is based on Massey & Denton -(1988). ACL is a measure of clustering of racial or ethnic +(1988) and Duncan, Cuzzort, & Duncan (1961; LC:60007089). +ACO is a measure of concentration of racial or ethnic populations within smaller geographical units that are located within -larger geographical units. ACL can range in value from 0 to Inf -and represents the degree to which an area is a racial or ethnic -enclave. A value of 1 indicates there is no differential clustering of -the racial or ethnic subgroup. A value greater than 1 indicates the -racial or ethnic subgroup live nearer to one another. A value less than -1 indicates the racial or ethnic subgroup do not live near one -another.

    -
    massey2018HTX <- massey(
    -  geo_large = 'tract',
    -  geo_small = 'cbg',
    -  state = 'TX',
    -  county = 'Harris County',
    -  year = 2018,
    -  subgroup = c('NHoLB', 'HoLB')
    -)
    -
    -# Obtain the 2018 census tracts in Harris County, TX, from the 'tigris' package
    -tract2018 <- tracts(year = 2018, state = 'TX')
    -# Obtain the 2018 Texas counties from the 'tigris' package
    -county2018 <- counties(state = 'TX', year = 2018, cb = TRUE)
    -
    -# Join the ACL values to the census tract geometries and filter for Harris County, TX
    -HTX2010massey <- tract2018 %>%
    -  left_join(massey2018HTX$acl, by = 'GEOID') %>%
    -  filter(!st_is_empty(.)) %>%
    -  filter(!is.na(ACL)) %>%
    -  st_filter(county2018 %>% filter(NAME == 'Harris')) %>%
    -  st_make_valid()
    -
    # Visualize the ACL values (2013-2017 5-year ACS) for census block groups within census tracts of Harris County, TX
    +larger geographical units. ACO can range from 0 to 1 and
    +represents the relative amount of physical space occupied by a racial or
    +ethnic subgroup in a larger geographical unit. A value of 1 indicates
    +that a racial or ethnic subgroup has achieved the maximum spatial
    +concentration possible (all racial or ethnic subgroup members live in
    +the smallest of the smaller geographical units). A value of 0 indicates
    +the maximum deconcentration possible (all racial or ethnic subgroup
    +members live in the largest of the smaller geographical units).

    +
    massey_duncan2019WI <- massey_duncan(
    +  geo_large = 'cbsa',
    +  geo_small = 'tract',
    +  state = c('WI', 'IL', 'MI', 'MN'),
    +  year = 2019,
    +  subgroup = c('NHoLB', 'HoLB')
    +)
    +
    +# Obtain the 2019 census-designated places from the 'tigris' package
    +cbsa2019 <- core_based_statistical_areas(year = 2019, cb = TRUE)
    +# Obtain the 2019 state from the 'tigris' package
    +states2019 <- states(year = 2019, cb = TRUE)
    +
    +# Join the ACO values to the census-designated places geometries and filter for Connecticut
    +WI2019massey_duncan <- cbsa2019 %>%
    +  left_join(massey_duncan2019WI$aco, by = 'GEOID') %>%
    +  filter(!st_is_empty(.)) %>%
    +  filter(!is.na(ACO)) %>%
    +  st_filter(states2019 %>% filter(STUSPS == 'WI'), .predicate = st_within) %>%
    +  st_make_valid()
    +
    # Visualize the ACO values (2015-2019 5-year ACS) for census tracts within core-based statistical areas of Wisconsin.
     ggplot() +
       geom_sf(
    -    data = HTX2010massey,
    -    aes(fill = ACL)
    +    data = WI2019massey_duncan,
    +    aes(fill = ACO)
       ) +
       geom_sf(
    -    data = county2018 %>% filter(NAME == 'Harris County'),
    +    data = states2019 %>% filter(STUSPS == 'WI'),
         fill = 'transparent',
         color = 'black',
         size = 0.2
       ) +
       theme_minimal() +
    -  scale_fill_gradient2(
    -    low = '#998ec3', 
    -    mid = '#f7f7f7', 
    -    high = '#f1a340', 
    -    midpoint = 0
    -  ) +
    -  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') +
    -  ggtitle(
    -    'Absolute Clustering (Massey & Denton)\nCensus block groups within census tracts of Harris County, TX',
    -    subtitle = 'Black population'
    -  )
    -

    + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2015-2019 estimates') + + ggtitle( + 'Absolute Concentration (Massey & Duncan)\ncensus tracts within core-based statistical areas of Wisconsin', + subtitle = 'Black population' + )
    +

    -
    -

    Compute Relative Clustering (RCL)

    -

    Compute the racial or ethnic RCL values (2014-2018 5-year -ACS) for census block groups within census tracts of Harris County, TX. -This metric is based on Massey & Denton -(1988). RCL equals 0 when the racial or ethnic subgroup -population displays the same amount of clustering as the referent racial -or ethnic subgroup population, and is positive whenever the racial or -ethnic subgroup population members display greater clustering than is -typical of the the referent racial or ethnic subgroup population. If the -racial or ethnic subgroup population members were less clustered than -the the referent racial or ethnic subgroup population, then RCL -would be negative.

    -
    denton2018HTX <- denton(
    -  geo_large = 'tract',
    +
    +

    Compute Absolute Centralization (ACE)

    +

    Compute the racial or ethnic ACE values (2013-2017 5-year +ACS) for census block groups within census-designated places of +Connecticut. This metric is based on Duncan, Cuzzort, & Duncan +(1961; LC:60007089) and Massey & Denton +(1988). ACE is a measure of the degree to which racial or +ethnic populations within smaller geographical units are located near +the center of a larger geographical unit. ACO is a measure of +concentration of racial or ethnic populations within smaller +geographical units that are located within larger geographical units. +ACO can range from 0 to 1 and represents the relative amount of +physical space occupied by a racial or ethnic subgroup in a larger +geographical unit. A value of 1 indicates that a racial or ethnic +subgroup has achieved the maximum spatial concentration possible (all +racial or ethnic subgroup members live in the smallest of the smaller +geographical units). A value of 0 indicates the maximum deconcentration +possible (all racial or ethnic subgroup members live in the largest of +the smaller geographical units).

    +

    Note: The original metric used the location of the central business +district (CBD) to compute the metric, but the U.S. Census Bureau has not +defined CBDs for U.S. cities since the 1982 Census of Retail +Trade. Therefore, this function uses the the centroids of each +larger geographical unit as the ‘centre’, but may not represent the +current CBD.

    +
    duncan_cuzzort2017CT <- duncan_cuzzort(
    +  geo_large = 'place',
       geo_small = 'cbg',
    -  state = 'TX',
    -  county = 'Harris County',
    -  year = 2018,
    -  subgroup = 'NHoLB',
    -  subgroup_ref = 'NHoLW'
    -)
    -
    -# Obtain the 2018 census tracts in Harris County, TX, from the 'tigris' package
    -tract2018 <- tracts(year = 2018, state = 'TX')
    -# Obtain the 2018 Texas counties from the 'tigris' package
    -county2018 <- counties(state = 'TX', year = 2018, cb = TRUE)
    -
    -# Join the RCL values to the census tract geometries and filter for Harris County, TX
    -HTX2010denton <- tract2018 %>%
    -  left_join(denton2018HTX$rcl, by = 'GEOID') %>%
    -  filter(!st_is_empty(.)) %>%
    -  filter(!is.na(RCL)) %>%
    -  st_filter(county2018 %>% filter(NAME == 'Harris')) %>%
    -  st_make_valid()
    -
    # Visualize the RCL values (2013-2017 5-year ACS) for census block groups within census tracts of Harris County, TX
    +  state = 'CT',
    +  year = 2017,
    +  subgroup = c('NHoLB', 'HoLB')
    +)
    +
    +# Obtain the 2017 census-designated places in Connecticut from the 'tigris' package
    +places2017 <- places(year = 2017, state = 'CT')
    +# Obtain the 2017 state from the 'tigris' package
    +states2017 <- states(year = 2017, cb = TRUE)
    +
    +# Join the ACE values to the census-designated places geometries and filter for Connecticut
    +CT2010duncan_cuzzort <- places2017 %>%
    +  left_join(duncan_cuzzort2017CT$ace, by = 'GEOID') %>%
    +  filter(!st_is_empty(.)) %>%
    +  filter(!is.na(ACE)) %>%
    +  st_filter(states2017 %>% filter(STUSPS == 'CT')) %>%
    +  st_make_valid()
    +
    # Visualize the ACE values (2013-2017 5-year ACS) for census block groups within census-designated places of Connecticut
     ggplot() +
       geom_sf(
    -    data = HTX2010denton,
    -    aes(fill = RCL)
    +    data = CT2010duncan_cuzzort,
    +    aes(fill = ACE)
       ) +
       geom_sf(
    -    data = county2018 %>% filter(NAME == 'Harris County'),
    +    data = states2017 %>% filter(STUSPS == 'CT'),
         fill = 'transparent',
         color = 'black',
         size = 0.2
    @@ -1419,66 +1427,71 @@ 

    Compute Relative Clustering (RCL)

    low = '#998ec3', mid = '#f7f7f7', high = '#f1a340', - midpoint = 0 - ) + - labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + - ggtitle( - 'Relative Clustering (Massey & Denton)\nCensus block groups within census tracts of Harris County, TX', - subtitle = 'Black non-Hispanic vs. white non-Hispanic' - )
    -

    + midpoint = 0, + limits = c(-1, 1) + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + + ggtitle( + 'Absolute Centralization (Duncan & Cuzzort)\nCensus block groups within census-designated places of Connecticut', + subtitle = 'Black population' + )
    +

    -
    -

    Compute Absolute Centralization (ACE)

    -

    Compute the racial or ethnic ACE values (2013-2017 5-year +

    +

    Compute Relative Centralization (RCE)

    +

    Compute the racial or ethnic RCE values (2013-2017 5-year ACS) for census block groups within census-designated places of -Connecticut. This metric is based on Duncan, Cuzzort, & Duncan -(1961; LC:60007089) and Massey & Denton -(1988). ACE is a measure of the degree to which racial or +Connecticut. This metric is based on Duncan & Duncan (1955b) +and Massey & Denton +(1988). RCE is a measure of the degree to which racial or ethnic populations within smaller geographical units are located near -the center of a larger geographical unit. ACE can range in +the center of a larger geographical unit. RCE can range in value from -1 to 1 and represents the spatial distribution of racial or -ethnic populations within smaller geographical units compared to the -distribution of land area around the center of a larger geographical -unit. Positive values indicate a tendency for racial or ethnic -populations to reside close to the center of a larger geographical unit, -while negative values indicate a tendency to live in outlying areas. A -score of 0 means that racial or ethnic populations have a uniform -distribution throughout a larger geographical unit. ACE gives -the proportion of racial or ethnic populations required to change -residence to achieve a uniform distribution of population around the -center of a larger geographical unit.

    +ethnic populations within smaller geographical units relative to the +compared to the distribution of the referent racial or ethnic population +around the center of a larger geographical unit. Positive values +indicate a tendency for racial or ethnic populations to reside closer to +the center of a larger geographical unit than the referent racial or +ethnic population, while negative values indicate the racial or ethnic +population is distributed farther from the center of a larger +geographical unit than the referent racial or ethnic population. A score +of 0 means that racial or ethnic populations have a uniform distribution +throughout a larger geographical unit. RCE gives the proportion +of racial or ethnic populations required to change residence to match +the degree of centralization of the referent racial or ethnic +population.

    Note: The original metric used the location of the central business district (CBD) to compute the metric, but the U.S. Census Bureau has not defined CBDs for U.S. cities since the 1982 Census of Retail Trade. Therefore, this function uses the the centroids of each larger geographical unit as the ‘centre’, but may not represent the current CBD.

    -
    duncan_cuzzort2017CT <- duncan_cuzzort(
    +
    duncan_duncan2017CT <- duncan_duncan(
       geo_large = 'place',
       geo_small = 'cbg',
       state = 'CT',
       year = 2017,
    -  subgroup = c('NHoLB', 'HoLB')
    -)
    -
    -# Obtain the 2017 census-designated places in Connecticut from the 'tigris' package
    -places2017 <- places(year = 2017, state = 'CT')
    -# Obtain the 2017 state from the 'tigris' package
    -states2017 <- states(year = 2017, cb = TRUE)
    -
    -# Join the ACE values to the census-designated places geometries and filter for Connecticut
    -CT2010duncan_cuzzort <- places2017 %>%
    -  left_join(duncan_cuzzort2017CT$ace, by = 'GEOID') %>%
    -  filter(!st_is_empty(.)) %>%
    -  filter(!is.na(ACE)) %>%
    -  st_filter(states2017 %>% filter(STUSPS == 'CT')) %>%
    -  st_make_valid()
    + subgroup = 'NHoLB', + subgroup_ref = 'NHoLW' +) + +# Obtain the 2017 census-designated places in Connecticut from the 'tigris' package +places2017 <- places(year = 2017, state = 'CT') +# Obtain the 2017 state from the 'tigris' package +states2017 <- states(year = 2017, cb = TRUE) + +# Join the ACE values to the census-designated places geometries and filter for Connecticut +CT2010duncan_duncan <- places2017 %>% + left_join(duncan_duncan2017CT$rce, by = 'GEOID') %>% + filter(!st_is_empty(.)) %>% + filter(!is.na(RCE)) %>% + st_filter(states2017 %>% filter(STUSPS == 'CT')) %>% + st_make_valid()
    # Visualize the ACE values (2013-2017 5-year ACS) for census block groups within census-designated places of Connecticut
     ggplot() +
       geom_sf(
    -    data = CT2010duncan_cuzzort,
    -    aes(fill = ACE)
    +    data = CT2010duncan_duncan,
    +    aes(fill = RCE)
       ) +
       geom_sf(
         data = states2017 %>% filter(STUSPS == 'CT'),
    @@ -1496,69 +1509,54 @@ 

    Compute Absolute Centralization (ACE)

    ) + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + ggtitle( - 'Absolute Centralization (Duncan & Cuzzort)\nCensus block groups within census-designated places of Connecticut', - subtitle = 'Black population' + 'Relative Centralization (Duncan & Duncan)\nCensus block groups within census-designated places of Connecticut', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' )
    -

    +

    -
    -

    Compute Relative Centralization (RCE)

    -

    Compute the racial or ethnic RCE values (2013-2017 5-year -ACS) for census block groups within census-designated places of -Connecticut. This metric is based on Duncan & Duncan (1955b) -and Massey & Denton -(1988). RCE is a measure of the degree to which racial or -ethnic populations within smaller geographical units are located near -the center of a larger geographical unit. RCE can range in -value from -1 to 1 and represents the spatial distribution of racial or -ethnic populations within smaller geographical units relative to the -compared to the distribution of the referent racial or ethnic population -around the center of a larger geographical unit. Positive values -indicate a tendency for racial or ethnic populations to reside closer to -the center of a larger geographical unit than the referent racial or -ethnic population, while negative values indicate the racial or ethnic -population is distributed farther from the center of a larger -geographical unit than the referent racial or ethnic population. A score -of 0 means that racial or ethnic populations have a uniform distribution -throughout a larger geographical unit. RCE gives the proportion -of racial or ethnic populations required to change residence to match -the degree of centralization of the referent racial or ethnic -population.

    -

    Note: The original metric used the location of the central business -district (CBD) to compute the metric, but the U.S. Census Bureau has not -defined CBDs for U.S. cities since the 1982 Census of Retail -Trade. Therefore, this function uses the the centroids of each -larger geographical unit as the ‘centre’, but may not represent the -current CBD.

    -
    duncan_duncan2017CT <- duncan_duncan(
    -  geo_large = 'place',
    +
    +

    Compute Absolute Clustering (ACL)

    +

    Compute the racial or ethnic ACL values (2014-2018 5-year +ACS) for census block groups within census tracts of Harris County, TX. +This metric is based on Massey & Denton +(1988). ACL is a measure of clustering of racial or ethnic +populations within smaller geographical units that are located within +larger geographical units. ACL can range in value from 0 to Inf +and represents the degree to which an area is a racial or ethnic +enclave. A value of 1 indicates there is no differential clustering of +the racial or ethnic subgroup. A value greater than 1 indicates the +racial or ethnic subgroup live nearer to one another. A value less than +1 indicates the racial or ethnic subgroup do not live near one +another.

    +
    massey2018HTX <- massey(
    +  geo_large = 'tract',
       geo_small = 'cbg',
    -  state = 'CT',
    -  year = 2017,
    -  subgroup = 'NHoLB',
    -  subgroup_ref = 'NHoLW'
    +  state = 'TX',
    +  county = 'Harris County',
    +  year = 2018,
    +  subgroup = c('NHoLB', 'HoLB')
     )
     
    -# Obtain the 2017 census-designated places in Connecticut from the 'tigris' package
    -places2017 <- places(year = 2017, state = 'CT')
    -# Obtain the 2017 state from the 'tigris' package
    -states2017 <- states(year = 2017, cb = TRUE)
    +# Obtain the 2018 census tracts in Harris County, TX, from the 'tigris' package
    +tract2018 <- tracts(year = 2018, state = 'TX')
    +# Obtain the 2018 Texas counties from the 'tigris' package
    +county2018 <- counties(state = 'TX', year = 2018, cb = TRUE)
     
    -# Join the ACE values to the census-designated places geometries and filter for Connecticut
    -CT2010duncan_duncan <- places2017 %>%
    -  left_join(duncan_duncan2017CT$rce, by = 'GEOID') %>%
    +# Join the ACL values to the census tract geometries and filter for Harris County, TX
    +HTX2010massey <- tract2018 %>%
    +  left_join(massey2018HTX$acl, by = 'GEOID') %>%
       filter(!st_is_empty(.)) %>%
    -  filter(!is.na(RCE)) %>%
    -  st_filter(states2017 %>% filter(STUSPS == 'CT')) %>%
    +  filter(!is.na(ACL)) %>%
    +  st_filter(county2018 %>% filter(NAME == 'Harris')) %>%
       st_make_valid()
    -
    # Visualize the ACE values (2013-2017 5-year ACS) for census block groups within census-designated places of Connecticut
    +
    # Visualize the ACL values (2013-2017 5-year ACS) for census block groups within census tracts of Harris County, TX
     ggplot() +
       geom_sf(
    -    data = CT2010duncan_duncan,
    -    aes(fill = RCE)
    +    data = HTX2010massey,
    +    aes(fill = ACL)
       ) +
       geom_sf(
    -    data = states2017 %>% filter(STUSPS == 'CT'),
    +    data = county2018 %>% filter(NAME == 'Harris County'),
         fill = 'transparent',
         color = 'black',
         size = 0.2
    @@ -1568,15 +1566,14 @@ 

    Compute Relative Centralization (RCE)

    low = '#998ec3', mid = '#f7f7f7', high = '#f1a340', - midpoint = 0, - limits = c(-1, 1) - ) + - labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + - ggtitle( - 'Relative Centralization (Duncan & Duncan)\nCensus block groups within census-designated places of Connecticut', - subtitle = 'Black non-Hispanic vs. white non-Hispanic' - )
    -

    + midpoint = 0 + ) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') + + ggtitle( + 'Absolute Clustering (Massey & Denton)\nCensus block groups within census tracts of Harris County, TX', + subtitle = 'Black population' + )
    +

    Compute an index of spatial proximity (SP)

    @@ -1642,7 +1639,68 @@

    Compute an index of spatial proximity (SP)

    subtitle = 'Black non-Hispanic vs. white non-Hispanic' )

    -
    sessionInfo()
    +
    +
    +

    Compute Relative Clustering (RCL)

    +

    Compute the racial or ethnic RCL values (2014-2018 5-year +ACS) for census block groups within census tracts of Harris County, TX. +This metric is based on Massey & Denton +(1988). RCL equals 0 when the racial or ethnic subgroup +population displays the same amount of clustering as the referent racial +or ethnic subgroup population, and is positive whenever the racial or +ethnic subgroup population members display greater clustering than is +typical of the the referent racial or ethnic subgroup population. If the +racial or ethnic subgroup population members were less clustered than +the the referent racial or ethnic subgroup population, then RCL +would be negative.

    +
    denton2018HTX <- denton(
    +  geo_large = 'tract',
    +  geo_small = 'cbg',
    +  state = 'TX',
    +  county = 'Harris County',
    +  year = 2018,
    +  subgroup = 'NHoLB',
    +  subgroup_ref = 'NHoLW'
    +)
    +
    +# Obtain the 2018 census tracts in Harris County, TX, from the 'tigris' package
    +tract2018 <- tracts(year = 2018, state = 'TX')
    +# Obtain the 2018 Texas counties from the 'tigris' package
    +county2018 <- counties(state = 'TX', year = 2018, cb = TRUE)
    +
    +# Join the RCL values to the census tract geometries and filter for Harris County, TX
    +HTX2010denton <- tract2018 %>%
    +  left_join(denton2018HTX$rcl, by = 'GEOID') %>%
    +  filter(!st_is_empty(.)) %>%
    +  filter(!is.na(RCL)) %>%
    +  st_filter(county2018 %>% filter(NAME == 'Harris')) %>%
    +  st_make_valid()
    +
    # Visualize the RCL values (2013-2017 5-year ACS) for census block groups within census tracts of Harris County, TX
    +ggplot() +
    +  geom_sf(
    +    data = HTX2010denton,
    +    aes(fill = RCL)
    +  ) +
    +  geom_sf(
    +    data = county2018 %>% filter(NAME == 'Harris County'),
    +    fill = 'transparent',
    +    color = 'black',
    +    size = 0.2
    +  ) +
    +  theme_minimal() +
    +  scale_fill_gradient2(
    +    low = '#998ec3', 
    +    mid = '#f7f7f7', 
    +    high = '#f1a340', 
    +    midpoint = 0
    +  ) +
    +  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2013-2017 estimates') +
    +  ggtitle(
    +    'Relative Clustering (Massey & Denton)\nCensus block groups within census tracts of Harris County, TX',
    +    subtitle = 'Black non-Hispanic vs. white non-Hispanic'
    +  )
    +

    +
    sessionInfo()
    ## R version 4.4.1 (2024-06-14 ucrt)
     ## Platform: x86_64-w64-mingw32/x64
     ## Running under: Windows 10 x64 (build 19045)
    @@ -1664,7 +1722,7 @@ 

    Compute an index of spatial proximity (SP)

    ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: -## [1] tigris_2.1 tidycensus_1.6.5 sf_1.0-16 ndi_0.1.6.9010 +## [1] tigris_2.1 tidycensus_1.6.5 sf_1.0-16 ndi_0.1.6.9011 ## [5] ggplot2_3.5.1 dplyr_1.1.4 knitr_1.48 ## ## loaded via a namespace (and not attached):