diff --git a/DESCRIPTION b/DESCRIPTION index a46fad0..6b722fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ndi Title: Neighborhood Deprivation Indices -Version: 0.1.6.9013 +Version: 0.1.6.9014 Date: 2024-09-02 Authors@R: c(person(given = "Ian D.", diff --git a/NAMESPACE b/NAMESPACE index 9db4c2c..fca523d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(lieberson) export(massey) export(massey_duncan) export(messer) +export(morgan_denton) export(morgan_massey) export(powell_wiley) export(sudano) diff --git a/NEWS.md b/NEWS.md index d94eefd..1eb054d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # ndi (development version) -## ndi v0.1.6.9013 +## ndi v0.1.6.9014 ### New Features @@ -17,10 +17,11 @@ * Added `duncan_duncan()` function to compute the aspatial racial or ethnic Relative Centralization (*RCE*) based on [Duncan & Duncan (1955b)](https://doi.org/10.1086/221609) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `massey()` function to compute the aspatial racial or ethnic Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `massey_duncan()` function to compute the aspatial racial or ethnic Absolute Concentration (*ACO*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) and Duncan, Cuzzort, & Duncan (1961; LC:60007089) +* Added `morgan_denton()` function to compute the aspatial racial or ethnic Distance-Decay Interaction Index (_DPxy\*_) based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `morgan_massey()` function to compute the aspatial racial or ethnic Distance-Decay Isolation Index (_DPxx\*_) based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) #### New Function Capabilities -* Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `denton_cuzzort()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `morgan_massey()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. +* Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `denton_cuzzort()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `morgan_denton()`, `morgan_denton()`, `morgan_massey()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. * Added census block group computation for `anthopolos()` by specifying `geo == 'cbg'` or `geo == 'block group'` * Added `holder` argument to `atkinson()` function to toggle the computation with or without the Hölder mean. The function can now compute *A* without the Hölder mean. The default is `holder = FALSE`. * Added `crs` argument to `anthopolos()`, `bravo()`, and `white_blau()` functions to provide spatial projection of the distance-based metrics diff --git a/R/globals.R b/R/globals.R index 07df090..7835876 100644 --- a/R/globals.R +++ b/R/globals.R @@ -283,6 +283,7 @@ globalVariables( 'n_1', 'n_2', 't_cs', - 'DPxx_star' + 'DPxx_star', + 'DPxy_star' ) ) diff --git a/R/morgan_denton.R b/R/morgan_denton.R new file mode 100644 index 0000000..7010de6 --- /dev/null +++ b/R/morgan_denton.R @@ -0,0 +1,450 @@ +#' Distance-Decay Interaction Index based on Morgan (1983) and Massey & Denton (1988) +#' +#' Compute the aspatial Distance-Decay Interaction Index (Morgan) of a selected racial or ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_small = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial or ethnic subgroup(s) as the comparison population. See Details for available choices. +#' @param subgroup_ixn Character string specifying the racial or ethnic subgroup(s) as the interaction population. If the same as \code{subgroup}, will compute the simple isolation of the group. See Details for available choices. +#' @param crs Numeric or character string specifying the coordinate reference system to compute the distance-based metric. The default is Albers North America \code{crs = 'ESRI:102008'}. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Distance-Decay Interaction Index (_DPxy\*_) of selected racial or ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. This function provides the computation of _DPxy\*_ for any of the U.S. Census Bureau race or ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'place'}, \code{geo_large = 'csa'}, or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial or ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' _DPxy\*_ is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. _DPxy\*_ is some measure of the probability that a member of a racial or ethnic subgroup will meet or interact with a member of another racial or ethnic subgroup(s). _DPxy\*_ can range in value from 0 to 1 with higher values signifying higher probability of interaction. +#' +#' The metric uses the exponential transform of a distance matrix (kilometers) between smaller geographical area centroids, with a diagonal defined as \code{(0.6*a_{i})^{0.5}} where \code{a_{i}} is the area (square kilometers) of smaller geographical unit \code{i} as defined by White (1983) \doi{10.1086/227768}. +#' +#' Larger geographical units available include states \code{geo_large = 'state'}, counties \code{geo_large = 'county'}, census tracts \code{geo_large = 'tract'}, census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, and metropolitan divisions \code{geo_large = 'metro'}. Smaller geographical units available include, counties \code{geo_small = 'county'}, census tracts \code{geo_small = 'tract'}, and census block groups \code{geo_small = 'cbg'}. If a larger geographical unit is comprised of only one smaller geographical unit (e.g., a U.S county contains only one census tract), then the _DPxy\*_ value returned is NA. If the larger geographical unit is census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, or metropolitan divisions \code{geo_large = 'metro'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the _DPxy\*_ computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{dpxy_star}}{An object of class 'tbl' for the GEOID, name, and _DPxy\*_ at specified larger census geographies.} +#' \item{\code{dpxy_star_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute _DPxy\*_.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_centroid st_distance st_drop_geometry st_transform st_within +#' @importFrom stats complete.cases +#' @importFrom stringr str_trim +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions places +#' @importFrom units drop_units set_units +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Distance-Decay Interaction Index (a measure of clustering) +#' ## of non-Hispanic Black vs. non-Hispanic white populations +#' ## in census tracts within counties of Georgia, U.S.A. (2020) +#' morgan_denton( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = 'NHoLB', +#' subgroup_ixn = 'NHoLW' +#' ) +#' +#' } +#' +morgan_denton <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + subgroup_ixn, + crs = 'ESRI:102008', + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'place', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'cbg', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + match.arg( + subgroup_ixn, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup, subgroup_ixn)] + out_names <- c(names(selected_vars), 'ALAND') # save for output + in_subgroup <- paste0(subgroup, 'E') + in_subgroup_ixn <- paste0(subgroup_ixn, 'E') + + # Acquire DPxy_star variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'cbg' | geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('cbg', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + cbg = gsub('[^0-9\\.]', '', cbg) + ) + } + + # Grouping IDs for DPxy_star computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) + } + if (geo_large == 'place') { + stopifnot(is.numeric(year), year >= 2011) # Places only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::places( + year = year, state = unique(out_dat$state)) + )) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + place = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metropolitan Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + + # Count of racial or ethnic subgroup populations + ## Count of racial or ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = as.data.frame(.)[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(as.data.frame(.)[, in_subgroup])) + } + ## Count of racial or ethnic interaction subgroup population + if (length(in_subgroup_ixn) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ixn = as.data.frame(.)[, in_subgroup_ixn]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ixn = rowSums(as.data.frame(.)[, in_subgroup_ixn])) + } + + # Compute DPxy* + ## From Massey & Denton (1988) https://doi.org/10.1093/sf/67.2.281 + ## DP_{xy}^{*}=\sum_{i=1}^{n}\frac{x_{i}}{X}\sum_{j=1}^{n}\frac{K_{ij}y_{j}}{t_{j}} + ## Where for i & j smaller geographical units: + ## x_{i} denotes the racial or ethnic subgroup population of smaller geographical unit i + ## X denotes the racial or ethnic subgroup population of a larger geographical unit + ## y_{j} denotes the interaction racial or ethnic subgroup population of smaller geographical unit i + ## t_{j} denotes the total population of smaller geographical unit j + ## and + ## K_{ij} = \frac{exp(-d_{ij})t_{j}}{\sum_{i=1}^{n}exp(-d_{ij})t_{j}} + + ## Compute + out_tmp <- out_dat %>% + .[.$oid != 'NANA', ] %>% + split(., f = list(.$oid)) %>% + lapply(., FUN = dpxy_star_fun, crs = crs, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate( + DPxy_star = values, + oid = ind + ) %>% + dplyr::select(DPxy_star, oid) %>% + sf::st_drop_geometry() + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup, in_subgroup_ixn, 'ALAND')] %>% + sf::st_drop_geometry() + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) + if (geo_large == 'state') { + out <- out %>% + dplyr::select(oid, state, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, DPxy_star) + } + if (geo_large == 'county') { + out <- out %>% + dplyr::select(oid, state, county, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, DPxy_star) + } + if (geo_large == 'tract') { + out <- out %>% + dplyr::select(oid, state, county, tract, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, DPxy_star) + } + if (geo_large == 'place') { + out <- out %>% + dplyr::select(oid, place, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, place, DPxy_star) + } + if (geo_large == 'cbsa') { + out <- out %>% + dplyr::select(oid, cbsa, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, DPxy_star) + } + if (geo_large == 'csa') { + out <- out %>% + dplyr::select(oid, csa, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, DPxy_star) + } + if (geo_large == 'metro') { + out <- out %>% + dplyr::select(oid, metro, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, DPxy_star) + } + + out <- out %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::filter(!is.na(GEOID)) %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(dpxy_star = out, dpxy_star_data = out_dat, missing = missingYN) + + return(out) +} diff --git a/R/morgan_massey.R b/R/morgan_massey.R index d588a20..8066948 100644 --- a/R/morgan_massey.R +++ b/R/morgan_massey.R @@ -39,7 +39,7 @@ #' #' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. #' -#' _DPxx\*_ is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. _DPxx\*_ is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s). _DPxx\*_ can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). +#' _DPxx\*_ is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. _DPxx\*_ is some measure of the probability that a member of one racial or ethnic subgroup will meet or interact with a member of the same racial or ethnic subgroup. _DPxx\*_ can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). #' #' The metric uses the exponential transform of a distance matrix (kilometers) between smaller geographical area centroids, with a diagonal defined as \code{(0.6*a_{i})^{0.5}} where \code{a_{i}} is the area (square kilometers) of smaller geographical unit \code{i} as defined by White (1983) \doi{10.1086/227768}. #' @@ -293,7 +293,7 @@ morgan_massey <- function(geo_large = 'county', dplyr::mutate(subgroup = rowSums(as.data.frame(.)[, in_subgroup])) } - # Compute DPxx_star + # Compute DPxx* ## From Massey & Denton (1988) https://doi.org/10.1093/sf/67.2.281 ## DP_{xx}^{*}=\sum_{i=1}^{n}\frac{x_{i}}{X}\sum_{j=1}^{n}\frac{K_{ij}x_{j}}{t_{j}} ## Where for i & j smaller geographical units: diff --git a/R/ndi-package.R b/R/ndi-package.R index e7e45ef..168e207 100644 --- a/R/ndi-package.R +++ b/R/ndi-package.R @@ -44,6 +44,8 @@ #' #' \code{\link{massey_duncan}} Computes the aspatial Absolute Concentration (\emph{ACO}) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281} and Duncan, Cuzzort, & Duncan (1961; LC:60007089). #' +#' \code{\link{morgan_denton}} Computes the aspatial Distance-Decay Interaction Index (\emph{DPxy\*}) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. +#' #' \code{\link{morgan_massey}} Computes the aspatial Distance-Decay Isolation Index (\emph{DPxx\*}) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. #' #' \code{\link{sudano}} Computes the aspatial Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. diff --git a/R/utils.R b/R/utils.R index 21c8770..8313efa 100644 --- a/R/utils.R +++ b/R/utils.R @@ -221,7 +221,35 @@ djt_fun <- function(x, omit_NAs) { } } -# Internal function for Distance Decay Isolation +# Internal function for Distance-Decay Interaction Index +## From Massey & Denton (1988) https://doi.org/10.1093/sf/67.2.281 +## Returns NA value if only one smaller geography with population in a larger geography +dpxy_star_fun <- function(x, crs, omit_NAs) { + xx <- x %>% + dplyr::select(TotalPopE, subgroup, subgroup_ixn, ALAND) %>% + dplyr::filter(TotalPopE > 0) + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(sf::st_drop_geometry(xx)), ] } + if (nrow(sf::st_drop_geometry(xx)) < 2 || any(sf::st_drop_geometry(xx) < 0) || any(is.na(sf::st_drop_geometry(xx)))) { + NA + } else { + xx <- xx %>% sf::st_transform(crs = crs) + x_i <- x_j <- xx$subgroup + X <- sum(x_i, na.rm = TRUE) + y_j <- xx$subgroup_ixn + t_j <- xx$TotalPopE + d_ij <- suppressWarnings(sf::st_distance(sf::st_centroid(xx), sf::st_centroid(xx))) + diag(d_ij) <- sqrt(0.6 * xx$ALAND) + c_ij <- -d_ij %>% + units::set_units(value = km) %>% + units::drop_units() %>% + exp() + K_ij <- c_ij * t_j / sum(c_ij * t_j, na.rm = TRUE) + DPxx_star <- sum(x_i / X, na.rm = TRUE) * sum(K_ij * y_j / t_j, na.rm = TRUE) + return(DPxx_star) + } +} + +# Internal function for Distance-Decay Isolation Index ## From Massey & Denton (1988) https://doi.org/10.1093/sf/67.2.281 ## Returns NA value if only one smaller geography with population in a larger geography dpxx_star_fun <- function(x, crs, omit_NAs) { diff --git a/README.md b/README.md index f4a9bd4..979f8bd 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,10 @@ To install the development version from GitHub: Compute the aspatial Neighborhood Deprivation Index (NDI) based on Messer et al. (2006) +morgan_denton +Compute the aspatial racial or ethnic Distance-Decay Interaction Index (DPxy*) based on Morgan (1983) and Massey & Denton (1988) + + morgan_massey Compute the aspatial racial or ethnic Distance-Decay Isolation Index (DPxx*) based on Morgan (1983) and Massey & Denton (1988) @@ -1356,7 +1360,7 @@ ggplot() + # Compute aspatial racial or ethnic Isolation Index (Lieberson) # # ------------------------------------------------------------- # -# Interaction Index based on Lieberson (1981) and Bell (1954) +# Isolation Index based on Lieberson (1981) and Bell (1954) ## Selected subgroup: Not Hispanic or Latino, Black or African American alone ## Selected large geography: census tract ## Selected small geography: census block group @@ -1492,6 +1496,53 @@ ggsave(file.path('man', 'figures', 'aco.png'), height = 7, width = 7) ![](man/figures/aco.png) +```r +# --------------------------------------------------------------------------- # +# Compute aspatial racial or ethnic Distance-Decay Interaction Index (Morgan) # +# --------------------------------------------------------------------------- # + +# Distance-Decay Interaction Index based on Morgan (1983) and Massey & Denton (1988) +## Selected subgroup: Not Hispanic or Latino, Black or African American alone +## Selected interaction subgroup: Not Hispanic or Latino, Black or African American alone +## Selected large geography: census tract +## Selected small geography: census block group +DPxy_star_2020_DC <- morgan_denton( + geo_large = 'tract', + geo_small = 'cbg', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' +) + +# Obtain the 2020 census tracts from the 'tigris' package +tract_2020_DC <- tracts(state = 'DC', year = 2020, cb = TRUE) + +# Join the DPxx* (Morgan) values to the census tract geometry +DPxy_star_2020_DC <- tract_2020_DC %>% + left_join(DPxy_star_2020_DC$dpxy_star, by = 'GEOID') + +ggplot() + + geom_sf( + data = DPxy_star_2020_DC, + aes(fill = DPxy_star), + color = 'white' + ) + + theme_bw() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs( + fill = 'Index (Continuous)', + caption = 'Source: U.S. Census ACS 2016-2020 estimates' + ) + + ggtitle( + 'Distance-Decay Interaction Index (Morgan)\nCensus block groups within tracts of Washington, D.C.', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +ggsave(file.path('man', 'figures', 'dpxy_star.png'), height = 7, width = 7) +``` + +![](man/figures/dpxy_star.png) + ```r # ------------------------------------------------------------------------- # # Compute aspatial racial or ethnic Distance-Decay Isolation Index (Morgan) # diff --git a/cran-comments.md b/cran-comments.md index 4cb6d7f..bffe492 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -10,10 +10,11 @@ * Added `lieberson()` function to compute the aspatial racial or ethnic Isolation Index (_xPx\*_) based on Lieberson (1981; ISBN-13:978-1-032-53884-6) and and [Bell (1954)](https://doi.org/10.2307/2574118) * Added `massey()` function to compute the aspatial racial or ethnic Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `massey_duncan()` function to compute the aspatial racial or ethnic Absolute Concentration (*ACO*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) and Duncan, Cuzzort, & Duncan (1961; LC:60007089) + * Added `morgan_denton()` function to compute the aspatial racial or ethnic Distance-Decay Interaction Index (_DPxy\*_) based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `morgan_massey()` function to compute the aspatial racial or ethnic Distance-Decay Isolation Index (_DPxx\*_) based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `theil()` function the aspatial racial or ethnic Entropy (*H*) based on Theil (1972; ISBN:978-0-444-10378-9) and [Theil & Finizza (1971)](https://doi.org/110.1080/0022250X.1971.9989795) * Added `white_blau()` function to compute an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) - * Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `denton_cuzzort()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `morgan_massey()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. + * Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `denton_cuzzort()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `morgan_denton()`, `morgan_massey()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. * Added census block group computation for `anthopolos()` by specifying `geo == 'cbg'` or `geo == 'block group'` * Added `holder` argument to `atkinson()` function to toggle the computation with or without the Hölder mean. The function can now compute *A* without the Hölder mean. The default is `holder = FALSE`. * Added `crs` argument to `anthopolos()`, `bravo()`, and `white_blau()` functions to provide spatial projection of the distance-based metrics diff --git a/inst/CITATION b/inst/CITATION index 4ae6133..14549ea 100755 --- a/inst/CITATION +++ b/inst/CITATION @@ -3,7 +3,7 @@ bibentry(bibtype = 'manual', author = as.person('Ian D. Buller'), publisher = 'The Comprehensive R Archive Network', year = '2024', - number = '0.1.6.9013.', + number = '0.1.6.9014.', doi = '10.5281/zenodo.6989030', url = 'https://cran.r-project.org/package=ndi', @@ -11,7 +11,7 @@ bibentry(bibtype = 'manual', paste('Ian D. Buller (2024).', 'ndi: Neighborhood Deprivation Indices.', 'The Comprehensive R Archive Network.', - 'v0.1.6.9013.', + 'v0.1.6.9014.', 'DOI:10.5281/zenodo.6989030', 'Accessed by: https://cran.r-project.org/package=ndi'), @@ -460,6 +460,45 @@ bibentry(bibtype = 'Book', header = 'And (2):' ) +bibentry(bibtype = 'Article', + title = 'A Distance-Decay Based Interaction Index to Measure Residential Segregation', + author = as.person('Barrie S. Morgan'), + journal = 'Area', + year = '1983', + volume = '15', + issue = '4', + pages = '211--217', + url = 'https://www.jstor.org/stable/20001935', + + textVersion = + paste('Barrie S. Morgan (1983).', + 'A Distance-Decay Based Interaction Index to Measure Residential Segregation.', + 'Area, 15(4), 211-217.', + 'https://www.jstor.org/stable/20001935'), + + header = 'If you computed DPxy* (Morgan) values, please also cite (1):' +) + +bibentry(bibtype = 'Article', + title = 'The Dimensions of Residential Segregation', + author = c(as.person('Douglas S. Massey'), + as.person('Nancy A. Denton')), + journal = 'Social Forces', + year = '1988', + volume = '67', + issue = '2', + pages = '281--315', + doi = '10.1093/sf/67.2.281', + + textVersion = + paste('Douglas S. Massey & Nancy A. Denton (1988).', + 'The Dimensions of Residential Segregation.', + 'Social Forces, 67(1), 281-315.', + 'DOI:10.1093/sf/67.2.281'), + + header = 'And (2):' +) + bibentry(bibtype = 'Article', title = 'A Distance-Decay Based Interaction Index to Measure Residential Segregation', author = as.person('Barrie S. Morgan'), diff --git a/man/figures/dpxy_star.png b/man/figures/dpxy_star.png new file mode 100644 index 0000000..2b8c7fd Binary files /dev/null and b/man/figures/dpxy_star.png differ diff --git a/man/morgan_denton.Rd b/man/morgan_denton.Rd new file mode 100644 index 0000000..d2017ed --- /dev/null +++ b/man/morgan_denton.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/morgan_denton.R +\name{morgan_denton} +\alias{morgan_denton} +\title{Distance-Decay Interaction Index based on Morgan (1983) and Massey & Denton (1988)} +\usage{ +morgan_denton( + geo_large = "county", + geo_small = "tract", + year = 2020, + subgroup, + subgroup_ixn, + crs = "ESRI:102008", + omit_NAs = TRUE, + quiet = FALSE, + ... +) +} +\arguments{ +\item{geo_large}{Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}.} + +\item{geo_small}{Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_small = 'tract'}.} + +\item{year}{Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available.} + +\item{subgroup}{Character string specifying the racial or ethnic subgroup(s) as the comparison population. See Details for available choices.} + +\item{subgroup_ixn}{Character string specifying the racial or ethnic subgroup(s) as the interaction population. If the same as \code{subgroup}, will compute the simple isolation of the group. See Details for available choices.} + +\item{crs}{Numeric or character string specifying the coordinate reference system to compute the distance-based metric. The default is Albers North America \code{crs = 'ESRI:102008'}.} + +\item{omit_NAs}{Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE.} + +\item{quiet}{Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.} + +\item{...}{Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics} +} +\value{ +An object of class 'list'. This is a named list with the following components: + +\describe{ +\item{\code{dpxy_star}}{An object of class 'tbl' for the GEOID, name, and \emph{DPxy\*} at specified larger census geographies.} +\item{\code{dpxy_star_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +\item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute \emph{DPxy\*}.} +} +} +\description{ +Compute the aspatial Distance-Decay Interaction Index (Morgan) of a selected racial or ethnic subgroup(s) and U.S. geographies. +} +\details{ +This function will compute the aspatial Distance-Decay Interaction Index (\emph{DPxy\*}) of selected racial or ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. This function provides the computation of \emph{DPxy\*} for any of the U.S. Census Bureau race or ethnicity subgroups (including Hispanic and non-Hispanic individuals). + +The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'place'}, \code{geo_large = 'csa'}, or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial or ethnic subgroups (U.S. Census Bureau definitions) are: +\itemize{ +\item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +\item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +\item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +\item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +\item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +\item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +\item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +\item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +\item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +\item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +\item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +\item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +\item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +\item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +\item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +\item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +\item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +\item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +\item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +\item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +} + +Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. + +\emph{DPxy\*} is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. \emph{DPxy\*} is some measure of the probability that a member of a racial or ethnic subgroup will meet or interact with a member of another racial or ethnic subgroup(s). \emph{DPxy\*} can range in value from 0 to 1 with higher values signifying higher probability of interaction. + +The metric uses the exponential transform of a distance matrix (kilometers) between smaller geographical area centroids, with a diagonal defined as \code{(0.6*a_{i})^{0.5}} where \code{a_{i}} is the area (square kilometers) of smaller geographical unit \code{i} as defined by White (1983) \doi{10.1086/227768}. + +Larger geographical units available include states \code{geo_large = 'state'}, counties \code{geo_large = 'county'}, census tracts \code{geo_large = 'tract'}, census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, and metropolitan divisions \code{geo_large = 'metro'}. Smaller geographical units available include, counties \code{geo_small = 'county'}, census tracts \code{geo_small = 'tract'}, and census block groups \code{geo_small = 'cbg'}. If a larger geographical unit is comprised of only one smaller geographical unit (e.g., a U.S county contains only one census tract), then the \emph{DPxy\*} value returned is NA. If the larger geographical unit is census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, or metropolitan divisions \code{geo_large = 'metro'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the \emph{DPxy\*} computation. +} +\examples{ +\dontrun{ +# Wrapped in \dontrun{} because these examples require a Census API key. + + # Distance-Decay Interaction Index (a measure of clustering) + ## of non-Hispanic Black vs. non-Hispanic white populations + ## in census tracts within counties of Georgia, U.S.A. (2020) + morgan_denton( + geo_large = 'county', + geo_small = 'tract', + state = 'GA', + year = 2020, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' + ) + +} + +} +\seealso{ +\code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +} diff --git a/man/morgan_massey.Rd b/man/morgan_massey.Rd index 91a66a4..016c3e6 100644 --- a/man/morgan_massey.Rd +++ b/man/morgan_massey.Rd @@ -73,7 +73,7 @@ The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. -\emph{DPxx\*} is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. \emph{DPxx\*} is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s). \emph{DPxx\*} can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). +\emph{DPxx\*} is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. \emph{DPxx\*} is some measure of the probability that a member of one racial or ethnic subgroup will meet or interact with a member of the same racial or ethnic subgroup. \emph{DPxx\*} can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). The metric uses the exponential transform of a distance matrix (kilometers) between smaller geographical area centroids, with a diagonal defined as \code{(0.6*a_{i})^{0.5}} where \code{a_{i}} is the area (square kilometers) of smaller geographical unit \code{i} as defined by White (1983) \doi{10.1086/227768}. diff --git a/man/ndi-package.Rd b/man/ndi-package.Rd index 75ebaa2..6ffa831 100644 --- a/man/ndi-package.Rd +++ b/man/ndi-package.Rd @@ -51,6 +51,8 @@ Key content of the 'ndi' package include:\cr \code{\link{massey_duncan}} Computes the aspatial Absolute Concentration (\emph{ACO}) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281} and Duncan, Cuzzort, & Duncan (1961; LC:60007089). +\code{\link{morgan_denton}} Computes the aspatial Distance-Decay Interaction Index (\emph{DPxy\*}) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. + \code{\link{morgan_massey}} Computes the aspatial Distance-Decay Isolation Index (\emph{DPxx\*}) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. \code{\link{sudano}} Computes the aspatial Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. diff --git a/tests/testthat/test-morgan_denton.R b/tests/testthat/test-morgan_denton.R new file mode 100644 index 0000000..9553d24 --- /dev/null +++ b/tests/testthat/test-morgan_denton.R @@ -0,0 +1,79 @@ +context('morgan_denton') + +# ---------------------- # +# morgan_denton testthat # +# ---------------------- # + +test_that('morgan_denton throws error with invalid arguments', { + # Unavailable geography + expect_error( + morgan_denton( + geo_small = 'zcta', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + expect_error( + morgan_denton( + geo_large = 'block group', + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + ) + ) + + # Unavailable year + expect_error(morgan_denton( + state = 'DC', + year = 2005, + subgroup = 'NHoLB', + quiet = TRUE + )) + + # Unavailable subgroup + expect_error(morgan_denton( + state = 'DC', + year = 2020, + subgroup = 'terran', + quiet = TRUE + )) + + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + # Incorrect state + expect_error(morgan_denton( + state = 'AB', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + +}) + +test_that('morgan_denton works', { + skip_if(Sys.getenv('CENSUS_API_KEY') == '') + + expect_silent(morgan_denton( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB') + )) + + expect_silent(morgan_denton( + state = 'DC', + year = 2020, + subgroup = 'NHoLB', + quiet = TRUE + )) + + expect_silent(morgan_denton( + state = 'DC', + year = 2020, + subgroup = c('NHoLB', 'HoLB'), + quiet = TRUE + )) + +}) diff --git a/vignettes/ndi2.Rmd b/vignettes/ndi2.Rmd index 5436f40..63da1a1 100644 --- a/vignettes/ndi2.Rmd +++ b/vignettes/ndi2.Rmd @@ -70,7 +70,7 @@ Since version v0.1.1, the [*ndi*](https://CRAN.R-project.org/package=ndi) packag * `massey()` function that computes Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * `white_blau()` function that computes an index of spatial proximity (*SP*) based on [White (1986)](https://doi.org/10.2307/3644339) and Blau (1977; ISBN-13:978-0-029-03660-0) * `denton()` function that compute Relative Clustering (*RCL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) - * Distance Decay Interaction (Planned) + * `morgan_denton()` function that computes the Distance-Decay Interaction Index (_DPxy\*_) based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * `morgan_massey()` function that computes the Distance-Decay Isolation Index (_DPxx\*_) based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) ### Racial or ethnic residential segregation indices @@ -1127,9 +1127,61 @@ ggplot() + ) ``` +#### Compute Distance-Decay Isolation Index (_DPxy\*_) + +Compute the racial or ethnic _DPxy\*_ values (2017-2021 5-year ACS) for census tracts within census-designated placed of Louisiana. This metric is based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). _DPxy\*_ is some measure of the probability that a member of a racial or ethnic subgroup will meet or interact with a member of another racial or ethnic subgroup(s). _DPxy\*_ can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). + +```{r morgan_denton_prep, results = 'hide'} +morgan_denton2021LA <- morgan_denton( + geo_large = 'cbsa', + geo_small = 'tract', + state = 'LA', + year = 2021, + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' +) + +# Obtain the 2021 core-based statistical areas from the 'tigris' package +cbsa2021 <- core_based_statistical_areas(year = 2021, cb = TRUE) +# Obtain the 2021 state from the 'tigris' package +states2021 <- states(year = 2021, cb = TRUE) + +# Join the DPxx* values to the core-based statistical area geometries and filter for Louisiana +LA2021morgan_denton <- cbsa2021 %>% + left_join(morgan_denton2021LA$dpxy_star, by = 'GEOID') %>% + filter(!st_is_empty(.)) %>% + filter(!is.na(DPxy_star)) %>% + st_filter(states2021 %>% filter(STUSPS == 'LA'), .predicate = st_within) %>% + st_make_valid() +``` + +```{r morgan_denton_plot, fig.height = 7, fig.width = 7} +# Visualize the DPxx* values (2017-2021 5-year ACS) for census tracts within core-based statistical areas of Louisiana +ggplot() + + geom_sf( + data = LA2021morgan_denton, + aes(fill = DPxy_star), + size = 0.05, + color = 'white' + ) + + geom_sf( + data = states2021 %>% filter(STUSPS == 'LA'), + fill = 'transparent', + color = 'black', + size = 0.2 + ) + + theme_minimal() + + scale_fill_viridis_c(limits = c(0, 1)) + + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + + ggtitle( + 'Distance-Decay Interaction Index (Morgan)\nCensus tracts within core-based statistical areas of Louisiana', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' + ) +``` + #### Compute Distance-Decay Isolation Index (_DPxx\*_) -Compute the racial or ethnic _DPxx\*_ values (2017-2021 5-year ACS) for census tracts within census-designated placed of Louisiana. This metric is based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). _DPxx\*_ is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s). _DPxx\*_ can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). +Compute the racial or ethnic _DPxx\*_ values (2017-2021 5-year ACS) for census tracts within census-designated placed of Louisiana. This metric is based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281). _DPxx\*_ is some measure of the probability that a member of one racial or ethnic subgroup will meet or interact with a member of the same racial or ethnic subgroup. _DPxx\*_ can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). ```{r morgan_massey_prep, results = 'hide'} morgan_massey2021LA <- morgan_massey( diff --git a/vignettes/ndi2.html b/vignettes/ndi2.html index abd368d..6ab9122 100644 --- a/vignettes/ndi2.html +++ b/vignettes/ndi2.html @@ -438,7 +438,9 @@

Racial or Ethnic Residential Segregation Indices

  • denton() function that compute Relative Clustering (RCL) based on Massey & Denton (1988)
  • -
  • Distance Decay Interaction (Planned)
  • +
  • morgan_denton() function that computes the +Distance-Decay Interaction Index (DPxy*) based on Morgan (1983) and Massey & Denton +(1988)
  • morgan_massey() function that computes the Distance-Decay Isolation Index (DPxx*) based on Morgan (1983) and Massey & Denton (1988)
  • @@ -1765,40 +1767,42 @@

    Compute Relative Clustering (RCL)

    )

    -
    -

    Compute Distance-Decay Isolation Index (DPxx*)

    -

    Compute the racial or ethnic DPxx* values (2017-2021 5-year +

    +

    Compute Distance-Decay Isolation Index (DPxy*)

    +

    Compute the racial or ethnic DPxy* values (2017-2021 5-year ACS) for census tracts within census-designated placed of Louisiana. This metric is based on Morgan (1983) and Massey & Denton -(1988). DPxx* is some measure of the probability that a -member of one subgroup(s) will meet or interact with a member of another -subgroup(s). DPxx* can range in value from 0 to 1 with higher -values signifying higher probability of isolation (less isolation).

    -
    morgan_massey2021LA <- morgan_massey(
    +(1988). DPxy* is some measure of the probability that a
    +member of a racial or ethnic subgroup will meet or interact with a
    +member of another racial or ethnic subgroup(s). DPxy* can range
    +in value from 0 to 1 with higher values signifying higher probability of
    +isolation (less isolation).

    +
    morgan_denton2021LA <- morgan_denton(
       geo_large = 'cbsa',
       geo_small = 'tract',
       state = 'LA',
       year = 2021,
    -  subgroup = c('NHoLB', 'HoLB')
    -)
    -
    -# Obtain the 2021 core-based statistical areas from the 'tigris' package
    -cbsa2021 <- core_based_statistical_areas(year = 2021, cb = TRUE)
    -# Obtain the 2021 state from the 'tigris' package
    -states2021 <- states(year = 2021, cb = TRUE)
    -
    -# Join the DPxx* values to the core-based statistical area geometries and filter for Louisiana
    -LA2021morgan_massey <- cbsa2021 %>%
    -  left_join(morgan_massey2021LA$dpxx_star, by = 'GEOID') %>%
    -  filter(!st_is_empty(.)) %>%
    -  filter(!is.na(DPxx_star)) %>%
    -  st_filter(states2021 %>% filter(STUSPS == 'LA'), .predicate = st_within) %>%
    -  st_make_valid()
    + subgroup = 'NHoLB', + subgroup_ixn = 'NHoLW' +) + +# Obtain the 2021 core-based statistical areas from the 'tigris' package +cbsa2021 <- core_based_statistical_areas(year = 2021, cb = TRUE) +# Obtain the 2021 state from the 'tigris' package +states2021 <- states(year = 2021, cb = TRUE) + +# Join the DPxx* values to the core-based statistical area geometries and filter for Louisiana +LA2021morgan_denton <- cbsa2021 %>% + left_join(morgan_denton2021LA$dpxy_star, by = 'GEOID') %>% + filter(!st_is_empty(.)) %>% + filter(!is.na(DPxy_star)) %>% + st_filter(states2021 %>% filter(STUSPS == 'LA'), .predicate = st_within) %>% + st_make_valid()
    # Visualize the DPxx* values (2017-2021 5-year ACS) for census tracts within core-based statistical areas of Louisiana
     ggplot() +
       geom_sf(
    -    data = LA2021morgan_massey,
    -    aes(fill = DPxx_star),
    +    data = LA2021morgan_denton,
    +    aes(fill = DPxy_star),
         size = 0.05,
         color = 'white'
       ) +
    @@ -1812,11 +1816,64 @@ 

    Compute Distance-Decay Isolation Index (DPxx*)

    scale_fill_viridis_c(limits = c(0, 1)) + labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') + ggtitle( - 'Distance-Decay Isolation Index (Morgan)\nCensus tracts within core-based statistical areas of Louisiana', - subtitle = 'Black population' + 'Distance-Decay Interaction Index (Morgan)\nCensus tracts within core-based statistical areas of Louisiana', + subtitle = 'Black non-Hispanic vs. white non-Hispanic' )
    +

    +
    +
    +

    Compute Distance-Decay Isolation Index (DPxx*)

    +

    Compute the racial or ethnic DPxx* values (2017-2021 5-year +ACS) for census tracts within census-designated placed of Louisiana. +This metric is based on Morgan (1983) and Massey & Denton +(1988). DPxx* is some measure of the probability that a +member of one racial or ethnic subgroup will meet or interact with a +member of the same racial or ethnic subgroup. DPxx* can range +in value from 0 to 1 with higher values signifying higher probability of +isolation (less isolation).

    +
    morgan_massey2021LA <- morgan_massey(
    +  geo_large = 'cbsa',
    +  geo_small = 'tract',
    +  state = 'LA',
    +  year = 2021,
    +  subgroup = c('NHoLB', 'HoLB')
    +)
    +
    +# Obtain the 2021 core-based statistical areas from the 'tigris' package
    +cbsa2021 <- core_based_statistical_areas(year = 2021, cb = TRUE)
    +# Obtain the 2021 state from the 'tigris' package
    +states2021 <- states(year = 2021, cb = TRUE)
    +
    +# Join the DPxx* values to the core-based statistical area geometries and filter for Louisiana
    +LA2021morgan_massey <- cbsa2021 %>%
    +  left_join(morgan_massey2021LA$dpxx_star, by = 'GEOID') %>%
    +  filter(!st_is_empty(.)) %>%
    +  filter(!is.na(DPxx_star)) %>%
    +  st_filter(states2021 %>% filter(STUSPS == 'LA'), .predicate = st_within) %>%
    +  st_make_valid()
    +
    # Visualize the DPxx* values (2017-2021 5-year ACS) for census tracts within core-based statistical areas of Louisiana
    +ggplot() +
    +  geom_sf(
    +    data = LA2021morgan_massey,
    +    aes(fill = DPxx_star),
    +    size = 0.05,
    +    color = 'white'
    +  ) +
    +  geom_sf(
    +    data = states2021 %>% filter(STUSPS == 'LA'),
    +    fill = 'transparent',
    +    color = 'black',
    +    size = 0.2
    +  ) +
    +  theme_minimal() +
    +  scale_fill_viridis_c(limits = c(0, 1)) +
    +  labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
    +  ggtitle(
    +    'Distance-Decay Isolation Index (Morgan)\nCensus tracts within core-based statistical areas of Louisiana',
    +    subtitle = 'Black population'
    +  )

    -
    sessionInfo()
    +
    sessionInfo()
    ## R version 4.4.1 (2024-06-14 ucrt)
     ## Platform: x86_64-w64-mingw32/x64
     ## Running under: Windows 10 x64 (build 19045)
    @@ -1838,7 +1895,7 @@ 

    Compute Distance-Decay Isolation Index (DPxx*)

    ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: -## [1] tigris_2.1 tidycensus_1.6.5 sf_1.0-16 ndi_0.1.6.9013 +## [1] tigris_2.1 tidycensus_1.6.5 sf_1.0-16 ndi_0.1.6.9014 ## [5] ggplot2_3.5.1 dplyr_1.1.4 knitr_1.48 ## ## loaded via a namespace (and not attached):