diff --git a/DESCRIPTION b/DESCRIPTION index a46fad0..6b722fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ndi Title: Neighborhood Deprivation Indices -Version: 0.1.6.9013 +Version: 0.1.6.9014 Date: 2024-09-02 Authors@R: c(person(given = "Ian D.", diff --git a/NAMESPACE b/NAMESPACE index 9db4c2c..fca523d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(lieberson) export(massey) export(massey_duncan) export(messer) +export(morgan_denton) export(morgan_massey) export(powell_wiley) export(sudano) diff --git a/NEWS.md b/NEWS.md index d94eefd..1eb054d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # ndi (development version) -## ndi v0.1.6.9013 +## ndi v0.1.6.9014 ### New Features @@ -17,10 +17,11 @@ * Added `duncan_duncan()` function to compute the aspatial racial or ethnic Relative Centralization (*RCE*) based on [Duncan & Duncan (1955b)](https://doi.org/10.1086/221609) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `massey()` function to compute the aspatial racial or ethnic Absolute Clustering (*ACL*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `massey_duncan()` function to compute the aspatial racial or ethnic Absolute Concentration (*ACO*) based on [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) and Duncan, Cuzzort, & Duncan (1961; LC:60007089) +* Added `morgan_denton()` function to compute the aspatial racial or ethnic Distance-Decay Interaction Index (_DPxy\*_) based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) * Added `morgan_massey()` function to compute the aspatial racial or ethnic Distance-Decay Isolation Index (_DPxx\*_) based on [Morgan (1983)](https://www.jstor.org/stable/20001935) and [Massey & Denton (1988)](https://doi.org/10.1093/sf/67.2.281) #### New Function Capabilities -* Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `denton_cuzzort()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `morgan_massey()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. +* Added `geo_large = 'place'` for census-designated places, `geo_large = 'cbsa'` for core-based statistical areas, `geo_large = 'csa'` for combined statistical areas, and `geo_large = 'metro'` for metropolitan divisions as the larger geographical unit in `atkinson()`, `bell()`, `bemanian_beyer()`, `denton()`, `denton_cuzzort()`, `duncan()`, `duncan_cuzzort()`, `duncan_duncan()`, `hoover()`, `james_taeuber()`, `lieberson()`, `massey()`, `massey_duncan()`, `morgan_denton()`, `morgan_denton()`, `morgan_massey()`, `sudano()`, `theil()`, and `white()`, `white_blau()` functions. * Added census block group computation for `anthopolos()` by specifying `geo == 'cbg'` or `geo == 'block group'` * Added `holder` argument to `atkinson()` function to toggle the computation with or without the Hölder mean. The function can now compute *A* without the Hölder mean. The default is `holder = FALSE`. * Added `crs` argument to `anthopolos()`, `bravo()`, and `white_blau()` functions to provide spatial projection of the distance-based metrics diff --git a/R/globals.R b/R/globals.R index 07df090..7835876 100644 --- a/R/globals.R +++ b/R/globals.R @@ -283,6 +283,7 @@ globalVariables( 'n_1', 'n_2', 't_cs', - 'DPxx_star' + 'DPxx_star', + 'DPxy_star' ) ) diff --git a/R/morgan_denton.R b/R/morgan_denton.R new file mode 100644 index 0000000..7010de6 --- /dev/null +++ b/R/morgan_denton.R @@ -0,0 +1,450 @@ +#' Distance-Decay Interaction Index based on Morgan (1983) and Massey & Denton (1988) +#' +#' Compute the aspatial Distance-Decay Interaction Index (Morgan) of a selected racial or ethnic subgroup(s) and U.S. geographies. +#' +#' @param geo_large Character string specifying the larger geographical unit of the data. The default is counties \code{geo_large = 'county'}. +#' @param geo_small Character string specifying the smaller geographical unit of the data. The default is census tracts \code{geo_small = 'tract'}. +#' @param year Numeric. The year to compute the estimate. The default is 2020, and the years 2009 onward are currently available. +#' @param subgroup Character string specifying the racial or ethnic subgroup(s) as the comparison population. See Details for available choices. +#' @param subgroup_ixn Character string specifying the racial or ethnic subgroup(s) as the interaction population. If the same as \code{subgroup}, will compute the simple isolation of the group. See Details for available choices. +#' @param crs Numeric or character string specifying the coordinate reference system to compute the distance-based metric. The default is Albers North America \code{crs = 'ESRI:102008'}. +#' @param omit_NAs Logical. If FALSE, will compute index for a larger geographical unit only if all of its smaller geographical units have values. The default is TRUE. +#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE. +#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics +#' +#' @details This function will compute the aspatial Distance-Decay Interaction Index (_DPxy\*_) of selected racial or ethnic subgroups and U.S. geographies for a specified geographical extent (e.g., the entire U.S. or a single state) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. This function provides the computation of _DPxy\*_ for any of the U.S. Census Bureau race or ethnicity subgroups (including Hispanic and non-Hispanic individuals). +#' +#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the computation. The yearly estimates are available for 2009 onward when ACS-5 data are available (2010 onward for \code{geo_large = 'cbsa'} and 2011 onward for \code{geo_large = 'place'}, \code{geo_large = 'csa'}, or \code{geo_large = 'metro'}) but may be available from other U.S. Census Bureau surveys. The twenty racial or ethnic subgroups (U.S. Census Bureau definitions) are: +#' \itemize{ +#' \item \strong{B03002_002}: not Hispanic or Latino \code{'NHoL'} +#' \item \strong{B03002_003}: not Hispanic or Latino, white alone \code{'NHoLW'} +#' \item \strong{B03002_004}: not Hispanic or Latino, Black or African American alone \code{'NHoLB'} +#' \item \strong{B03002_005}: not Hispanic or Latino, American Indian and Alaska Native alone \code{'NHoLAIAN'} +#' \item \strong{B03002_006}: not Hispanic or Latino, Asian alone \code{'NHoLA'} +#' \item \strong{B03002_007}: not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'NHoLNHOPI'} +#' \item \strong{B03002_008}: not Hispanic or Latino, Some other race alone \code{'NHoLSOR'} +#' \item \strong{B03002_009}: not Hispanic or Latino, Two or more races \code{'NHoLTOMR'} +#' \item \strong{B03002_010}: not Hispanic or Latino, Two races including Some other race \code{'NHoLTRiSOR'} +#' \item \strong{B03002_011}: not Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'NHoLTReSOR'} +#' \item \strong{B03002_012}: Hispanic or Latino \code{'HoL'} +#' \item \strong{B03002_013}: Hispanic or Latino, white alone \code{'HoLW'} +#' \item \strong{B03002_014}: Hispanic or Latino, Black or African American alone \code{'HoLB'} +#' \item \strong{B03002_015}: Hispanic or Latino, American Indian and Alaska Native alone \code{'HoLAIAN'} +#' \item \strong{B03002_016}: Hispanic or Latino, Asian alone \code{'HoLA'} +#' \item \strong{B03002_017}: Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone \code{'HoLNHOPI'} +#' \item \strong{B03002_018}: Hispanic or Latino, Some other race alone \code{'HoLSOR'} +#' \item \strong{B03002_019}: Hispanic or Latino, Two or more races \code{'HoLTOMR'} +#' \item \strong{B03002_020}: Hispanic or Latino, Two races including Some other race \code{'HoLTRiSOR'} +#' \item \strong{B03002_021}: Hispanic or Latino, Two races excluding Some other race, and three or more races \code{'HoLTReSOR'} +#' } +#' +#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. +#' +#' _DPxy\*_ is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. _DPxy\*_ is some measure of the probability that a member of a racial or ethnic subgroup will meet or interact with a member of another racial or ethnic subgroup(s). _DPxy\*_ can range in value from 0 to 1 with higher values signifying higher probability of interaction. +#' +#' The metric uses the exponential transform of a distance matrix (kilometers) between smaller geographical area centroids, with a diagonal defined as \code{(0.6*a_{i})^{0.5}} where \code{a_{i}} is the area (square kilometers) of smaller geographical unit \code{i} as defined by White (1983) \doi{10.1086/227768}. +#' +#' Larger geographical units available include states \code{geo_large = 'state'}, counties \code{geo_large = 'county'}, census tracts \code{geo_large = 'tract'}, census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, and metropolitan divisions \code{geo_large = 'metro'}. Smaller geographical units available include, counties \code{geo_small = 'county'}, census tracts \code{geo_small = 'tract'}, and census block groups \code{geo_small = 'cbg'}. If a larger geographical unit is comprised of only one smaller geographical unit (e.g., a U.S county contains only one census tract), then the _DPxy\*_ value returned is NA. If the larger geographical unit is census-designated places \code{geo_large = 'place'}, core-based statistical areas \code{geo_large = 'cbsa'}, combined statistical areas \code{geo_large = 'csa'}, or metropolitan divisions \code{geo_large = 'metro'}, only the smaller geographical units completely within a larger geographical unit are considered in the \emph{V} computation (see internal \code{\link[sf]{st_within}} function for more information) and recommend specifying all states within which the interested larger geographical unit are located using the internal \code{state} argument to ensure all appropriate smaller geographical units are included in the _DPxy\*_ computation. +#' +#' @return An object of class 'list'. This is a named list with the following components: +#' +#' \describe{ +#' \item{\code{dpxy_star}}{An object of class 'tbl' for the GEOID, name, and _DPxy\*_ at specified larger census geographies.} +#' \item{\code{dpxy_star_data}}{An object of class 'tbl' for the raw census values at specified smaller census geographies.} +#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute _DPxy\*_.} +#' } +#' +#' @import dplyr +#' @importFrom sf st_centroid st_distance st_drop_geometry st_transform st_within +#' @importFrom stats complete.cases +#' @importFrom stringr str_trim +#' @importFrom tidycensus get_acs +#' @importFrom tidyr pivot_longer separate +#' @importFrom tigris combined_statistical_areas core_based_statistical_areas metro_divisions places +#' @importFrom units drop_units set_units +#' @importFrom utils stack +#' @export +#' +#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}). +#' +#' @examples +#' \dontrun{ +#' # Wrapped in \dontrun{} because these examples require a Census API key. +#' +#' # Distance-Decay Interaction Index (a measure of clustering) +#' ## of non-Hispanic Black vs. non-Hispanic white populations +#' ## in census tracts within counties of Georgia, U.S.A. (2020) +#' morgan_denton( +#' geo_large = 'county', +#' geo_small = 'tract', +#' state = 'GA', +#' year = 2020, +#' subgroup = 'NHoLB', +#' subgroup_ixn = 'NHoLW' +#' ) +#' +#' } +#' +morgan_denton <- function(geo_large = 'county', + geo_small = 'tract', + year = 2020, + subgroup, + subgroup_ixn, + crs = 'ESRI:102008', + omit_NAs = TRUE, + quiet = FALSE, + ...) { + + # Check arguments + match.arg(geo_large, choices = c('state', 'county', 'tract', 'place', 'cbsa', 'csa', 'metro')) + match.arg(geo_small, choices = c('county', 'tract', 'cbg', 'block group')) + stopifnot(is.numeric(year), year >= 2009) # all variables available 2009 onward + match.arg( + subgroup, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + match.arg( + subgroup_ixn, + several.ok = TRUE, + choices = c( + 'NHoL', + 'NHoLW', + 'NHoLB', + 'NHoLAIAN', + 'NHoLA', + 'NHoLNHOPI', + 'NHoLSOR', + 'NHoLTOMR', + 'NHoLTRiSOR', + 'NHoLTReSOR', + 'HoL', + 'HoLW', + 'HoLB', + 'HoLAIAN', + 'HoLA', + 'HoLNHOPI', + 'HoLSOR', + 'HoLTOMR', + 'HoLTRiSOR', + 'HoLTReSOR' + ) + ) + + # Select census variables + vars <- c( + TotalPop = 'B03002_001', + NHoL = 'B03002_002', + NHoLW = 'B03002_003', + NHoLB = 'B03002_004', + NHoLAIAN = 'B03002_005', + NHoLA = 'B03002_006', + NHoLNHOPI = 'B03002_007', + NHoLSOR = 'B03002_008', + NHoLTOMR = 'B03002_009', + NHoLTRiSOR = 'B03002_010', + NHoLTReSOR = 'B03002_011', + HoL = 'B03002_012', + HoLW = 'B03002_013', + HoLB = 'B03002_014', + HoLAIAN = 'B03002_015', + HoLA = 'B03002_016', + HoLNHOPI = 'B03002_017', + HoLSOR = 'B03002_018', + HoLTOMR = 'B03002_019', + HoLTRiSOR = 'B03002_020', + HoLTReSOR = 'B03002_021' + ) + + selected_vars <- vars[c('TotalPop', subgroup, subgroup_ixn)] + out_names <- c(names(selected_vars), 'ALAND') # save for output + in_subgroup <- paste0(subgroup, 'E') + in_subgroup_ixn <- paste0(subgroup_ixn, 'E') + + # Acquire DPxy_star variables and sf geometries + out_dat <- suppressMessages(suppressWarnings( + tidycensus::get_acs( + geography = geo_small, + year = year, + output = 'wide', + variables = selected_vars, + geometry = TRUE, + keep_geo_vars = TRUE, + ... + ) + )) + + # Format output + if (geo_small == 'county') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('county', 'state'), sep = ',') + } + if (geo_small == 'tract') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate(tract = gsub('[^0-9\\.]', '', tract)) + } + if (geo_small == 'cbg' | geo_small == 'block group') { + out_dat <- out_dat %>% + tidyr::separate(NAME.y, into = c('cbg', 'tract', 'county', 'state'), sep = ',') %>% + dplyr::mutate( + tract = gsub('[^0-9\\.]', '', tract), + cbg = gsub('[^0-9\\.]', '', cbg) + ) + } + + # Grouping IDs for DPxy_star computation + if (geo_large == 'state') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = STATEFP, + state = stringr::str_trim(state) + ) + } + if (geo_large == 'tract') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP, TRACTCE), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) + } + if (geo_large == 'county') { + out_dat <- out_dat %>% + dplyr::mutate( + oid = paste0(STATEFP, COUNTYFP), + state = stringr::str_trim(state), + county = stringr::str_trim(county) + ) + } + if (geo_large == 'place') { + stopifnot(is.numeric(year), year >= 2011) # Places only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::places( + year = year, state = unique(out_dat$state)) + )) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + place = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'cbsa') { + stopifnot(is.numeric(year), year >= 2010) # CBSAs only available 2010 onward + lgeom <- suppressMessages(suppressWarnings(tigris::core_based_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + cbsa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'csa') { + stopifnot(is.numeric(year), year >= 2011) # CSAs only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::combined_statistical_areas(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 2] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + csa = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 3] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + if (geo_large == 'metro') { + stopifnot(is.numeric(year), year >= 2011) # Metropolitan Divisions only available 2011 onward + lgeom <- suppressMessages(suppressWarnings(tigris::metro_divisions(year = year))) + wlgeom <- sf::st_within(out_dat, lgeom) + out_dat <- out_dat %>% + dplyr::mutate( + oid = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 4] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist(), + metro = lapply(wlgeom, function(x) { + tmp <- lgeom[x, 5] %>% sf::st_drop_geometry() + lapply(tmp, function(x) { if (length(x) == 0) NA else x }) + }) %>% + unlist() + ) + } + + # Count of racial or ethnic subgroup populations + ## Count of racial or ethnic comparison subgroup population + if (length(in_subgroup) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = as.data.frame(.)[, in_subgroup]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup = rowSums(as.data.frame(.)[, in_subgroup])) + } + ## Count of racial or ethnic interaction subgroup population + if (length(in_subgroup_ixn) == 1) { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ixn = as.data.frame(.)[, in_subgroup_ixn]) + } else { + out_dat <- out_dat %>% + dplyr::mutate(subgroup_ixn = rowSums(as.data.frame(.)[, in_subgroup_ixn])) + } + + # Compute DPxy* + ## From Massey & Denton (1988) https://doi.org/10.1093/sf/67.2.281 + ## DP_{xy}^{*}=\sum_{i=1}^{n}\frac{x_{i}}{X}\sum_{j=1}^{n}\frac{K_{ij}y_{j}}{t_{j}} + ## Where for i & j smaller geographical units: + ## x_{i} denotes the racial or ethnic subgroup population of smaller geographical unit i + ## X denotes the racial or ethnic subgroup population of a larger geographical unit + ## y_{j} denotes the interaction racial or ethnic subgroup population of smaller geographical unit i + ## t_{j} denotes the total population of smaller geographical unit j + ## and + ## K_{ij} = \frac{exp(-d_{ij})t_{j}}{\sum_{i=1}^{n}exp(-d_{ij})t_{j}} + + ## Compute + out_tmp <- out_dat %>% + .[.$oid != 'NANA', ] %>% + split(., f = list(.$oid)) %>% + lapply(., FUN = dpxy_star_fun, crs = crs, omit_NAs = omit_NAs) %>% + utils::stack(.) %>% + dplyr::mutate( + DPxy_star = values, + oid = ind + ) %>% + dplyr::select(DPxy_star, oid) %>% + sf::st_drop_geometry() + + # Warning for missingness of census characteristics + missingYN <- out_dat[, c('TotalPopE', in_subgroup, in_subgroup_ixn, 'ALAND')] %>% + sf::st_drop_geometry() + names(missingYN) <- out_names + missingYN <- missingYN %>% + tidyr::pivot_longer( + cols = dplyr::everything(), + names_to = 'variable', + values_to = 'val' + ) %>% + dplyr::group_by(variable) %>% + dplyr::summarise( + total = dplyr::n(), + n_missing = sum(is.na(val)), + percent_missing = paste0(round(mean(is.na(val)) * 100, 2), ' %') + ) + + if (quiet == FALSE) { + # Warning for missing census data + if (sum(missingYN$n_missing) > 0) { + message('Warning: Missing census data') + } + } + + # Format output + out <- out_dat %>% + sf::st_drop_geometry() %>% + dplyr::left_join(out_tmp, by = dplyr::join_by(oid)) + if (geo_large == 'state') { + out <- out %>% + dplyr::select(oid, state, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, DPxy_star) + } + if (geo_large == 'county') { + out <- out %>% + dplyr::select(oid, state, county, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, DPxy_star) + } + if (geo_large == 'tract') { + out <- out %>% + dplyr::select(oid, state, county, tract, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, state, county, tract, DPxy_star) + } + if (geo_large == 'place') { + out <- out %>% + dplyr::select(oid, place, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, place, DPxy_star) + } + if (geo_large == 'cbsa') { + out <- out %>% + dplyr::select(oid, cbsa, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, cbsa, DPxy_star) + } + if (geo_large == 'csa') { + out <- out %>% + dplyr::select(oid, csa, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, csa, DPxy_star) + } + if (geo_large == 'metro') { + out <- out %>% + dplyr::select(oid, metro, DPxy_star) %>% + unique(.) %>% + dplyr::mutate(GEOID = oid) %>% + dplyr::select(GEOID, metro, DPxy_star) + } + + out <- out %>% + .[.$GEOID != 'NANA', ] %>% + dplyr::filter(!is.na(GEOID)) %>% + dplyr::distinct(GEOID, .keep_all = TRUE) %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out_dat <- out_dat %>% + dplyr::arrange(GEOID) %>% + dplyr::as_tibble() + + out <- list(dpxy_star = out, dpxy_star_data = out_dat, missing = missingYN) + + return(out) +} diff --git a/R/morgan_massey.R b/R/morgan_massey.R index d588a20..8066948 100644 --- a/R/morgan_massey.R +++ b/R/morgan_massey.R @@ -39,7 +39,7 @@ #' #' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. #' -#' _DPxx\*_ is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. _DPxx\*_ is some measure of the probability that a member of one subgroup(s) will meet or interact with a member of another subgroup(s). _DPxx\*_ can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). +#' _DPxx\*_ is a measure of clustering of racial or ethnic populations within smaller geographical units that are located within larger geographical units. _DPxx\*_ is some measure of the probability that a member of one racial or ethnic subgroup will meet or interact with a member of the same racial or ethnic subgroup. _DPxx\*_ can range in value from 0 to 1 with higher values signifying higher probability of isolation (less isolation). #' #' The metric uses the exponential transform of a distance matrix (kilometers) between smaller geographical area centroids, with a diagonal defined as \code{(0.6*a_{i})^{0.5}} where \code{a_{i}} is the area (square kilometers) of smaller geographical unit \code{i} as defined by White (1983) \doi{10.1086/227768}. #' @@ -293,7 +293,7 @@ morgan_massey <- function(geo_large = 'county', dplyr::mutate(subgroup = rowSums(as.data.frame(.)[, in_subgroup])) } - # Compute DPxx_star + # Compute DPxx* ## From Massey & Denton (1988) https://doi.org/10.1093/sf/67.2.281 ## DP_{xx}^{*}=\sum_{i=1}^{n}\frac{x_{i}}{X}\sum_{j=1}^{n}\frac{K_{ij}x_{j}}{t_{j}} ## Where for i & j smaller geographical units: diff --git a/R/ndi-package.R b/R/ndi-package.R index e7e45ef..168e207 100644 --- a/R/ndi-package.R +++ b/R/ndi-package.R @@ -44,6 +44,8 @@ #' #' \code{\link{massey_duncan}} Computes the aspatial Absolute Concentration (\emph{ACO}) based on Massey & Denton (1988) \doi{10.1093/sf/67.2.281} and Duncan, Cuzzort, & Duncan (1961; LC:60007089). #' +#' \code{\link{morgan_denton}} Computes the aspatial Distance-Decay Interaction Index (\emph{DPxy\*}) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. +#' #' \code{\link{morgan_massey}} Computes the aspatial Distance-Decay Isolation Index (\emph{DPxx\*}) based on Morgan (1986) \url{https://www.jstor.org/stable/20001935} and Massey & Denton (1988) \doi{10.1093/sf/67.2.281}. #' #' \code{\link{sudano}} Computes the aspatial Location Quotient (\emph{LQ}) based on Merton (1939) \doi{10.2307/2084686} and Sudano et al. (2013) \doi{10.1016/j.healthplace.2012.09.015}. diff --git a/R/utils.R b/R/utils.R index 21c8770..8313efa 100644 --- a/R/utils.R +++ b/R/utils.R @@ -221,7 +221,35 @@ djt_fun <- function(x, omit_NAs) { } } -# Internal function for Distance Decay Isolation +# Internal function for Distance-Decay Interaction Index +## From Massey & Denton (1988) https://doi.org/10.1093/sf/67.2.281 +## Returns NA value if only one smaller geography with population in a larger geography +dpxy_star_fun <- function(x, crs, omit_NAs) { + xx <- x %>% + dplyr::select(TotalPopE, subgroup, subgroup_ixn, ALAND) %>% + dplyr::filter(TotalPopE > 0) + if (omit_NAs == TRUE) { xx <- xx[stats::complete.cases(sf::st_drop_geometry(xx)), ] } + if (nrow(sf::st_drop_geometry(xx)) < 2 || any(sf::st_drop_geometry(xx) < 0) || any(is.na(sf::st_drop_geometry(xx)))) { + NA + } else { + xx <- xx %>% sf::st_transform(crs = crs) + x_i <- x_j <- xx$subgroup + X <- sum(x_i, na.rm = TRUE) + y_j <- xx$subgroup_ixn + t_j <- xx$TotalPopE + d_ij <- suppressWarnings(sf::st_distance(sf::st_centroid(xx), sf::st_centroid(xx))) + diag(d_ij) <- sqrt(0.6 * xx$ALAND) + c_ij <- -d_ij %>% + units::set_units(value = km) %>% + units::drop_units() %>% + exp() + K_ij <- c_ij * t_j / sum(c_ij * t_j, na.rm = TRUE) + DPxx_star <- sum(x_i / X, na.rm = TRUE) * sum(K_ij * y_j / t_j, na.rm = TRUE) + return(DPxx_star) + } +} + +# Internal function for Distance-Decay Isolation Index ## From Massey & Denton (1988) https://doi.org/10.1093/sf/67.2.281 ## Returns NA value if only one smaller geography with population in a larger geography dpxx_star_fun <- function(x, crs, omit_NAs) { diff --git a/README.md b/README.md index f4a9bd4..979f8bd 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,10 @@ To install the development version from GitHub:
morgan_denton
morgan_massey
denton()
function that compute Relative Clustering
(RCL) based on Massey & Denton
(1988)morgan_denton()
function that computes the
+Distance-Decay Interaction Index (DPxy*) based on Morgan (1983) and Massey & Denton
+(1988)morgan_massey()
function that computes the
Distance-Decay Isolation Index (DPxx*) based on Morgan (1983) and Massey & Denton
(1988)Compute the racial or ethnic DPxx* values (2017-2021 5-year +
Compute the racial or ethnic DPxy* values (2017-2021 5-year ACS) for census tracts within census-designated placed of Louisiana. This metric is based on Morgan (1983) and Massey & Denton -(1988). DPxx* is some measure of the probability that a -member of one subgroup(s) will meet or interact with a member of another -subgroup(s). DPxx* can range in value from 0 to 1 with higher -values signifying higher probability of isolation (less isolation).
-morgan_massey2021LA <- morgan_massey(
+(1988). DPxy* is some measure of the probability that a
+member of a racial or ethnic subgroup will meet or interact with a
+member of another racial or ethnic subgroup(s). DPxy* can range
+in value from 0 to 1 with higher values signifying higher probability of
+isolation (less isolation).
+morgan_denton2021LA <- morgan_denton(
geo_large = 'cbsa',
geo_small = 'tract',
state = 'LA',
year = 2021,
- subgroup = c('NHoLB', 'HoLB')
-)
-
-# Obtain the 2021 core-based statistical areas from the 'tigris' package
-cbsa2021 <- core_based_statistical_areas(year = 2021, cb = TRUE)
-# Obtain the 2021 state from the 'tigris' package
-states2021 <- states(year = 2021, cb = TRUE)
-
-# Join the DPxx* values to the core-based statistical area geometries and filter for Louisiana
-LA2021morgan_massey <- cbsa2021 %>%
- left_join(morgan_massey2021LA$dpxx_star, by = 'GEOID') %>%
- filter(!st_is_empty(.)) %>%
- filter(!is.na(DPxx_star)) %>%
- st_filter(states2021 %>% filter(STUSPS == 'LA'), .predicate = st_within) %>%
- st_make_valid()
+ subgroup = 'NHoLB',
+ subgroup_ixn = 'NHoLW'
+)
+
+# Obtain the 2021 core-based statistical areas from the 'tigris' package
+cbsa2021 <- core_based_statistical_areas(year = 2021, cb = TRUE)
+# Obtain the 2021 state from the 'tigris' package
+states2021 <- states(year = 2021, cb = TRUE)
+
+# Join the DPxx* values to the core-based statistical area geometries and filter for Louisiana
+LA2021morgan_denton <- cbsa2021 %>%
+ left_join(morgan_denton2021LA$dpxy_star, by = 'GEOID') %>%
+ filter(!st_is_empty(.)) %>%
+ filter(!is.na(DPxy_star)) %>%
+ st_filter(states2021 %>% filter(STUSPS == 'LA'), .predicate = st_within) %>%
+ st_make_valid()
# Visualize the DPxx* values (2017-2021 5-year ACS) for census tracts within core-based statistical areas of Louisiana
ggplot() +
geom_sf(
- data = LA2021morgan_massey,
- aes(fill = DPxx_star),
+ data = LA2021morgan_denton,
+ aes(fill = DPxy_star),
size = 0.05,
color = 'white'
) +
@@ -1812,11 +1816,64 @@ Compute Distance-Decay Isolation Index (DPxx*)
scale_fill_viridis_c(limits = c(0, 1)) +
labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
ggtitle(
- 'Distance-Decay Isolation Index (Morgan)\nCensus tracts within core-based statistical areas of Louisiana',
- subtitle = 'Black population'
+ 'Distance-Decay Interaction Index (Morgan)\nCensus tracts within core-based statistical areas of Louisiana',
+ subtitle = 'Black non-Hispanic vs. white non-Hispanic'
)
Compute the racial or ethnic DPxx* values (2017-2021 5-year +ACS) for census tracts within census-designated placed of Louisiana. +This metric is based on Morgan (1983) and Massey & Denton +(1988). DPxx* is some measure of the probability that a +member of one racial or ethnic subgroup will meet or interact with a +member of the same racial or ethnic subgroup. DPxx* can range +in value from 0 to 1 with higher values signifying higher probability of +isolation (less isolation).
+morgan_massey2021LA <- morgan_massey(
+ geo_large = 'cbsa',
+ geo_small = 'tract',
+ state = 'LA',
+ year = 2021,
+ subgroup = c('NHoLB', 'HoLB')
+)
+
+# Obtain the 2021 core-based statistical areas from the 'tigris' package
+cbsa2021 <- core_based_statistical_areas(year = 2021, cb = TRUE)
+# Obtain the 2021 state from the 'tigris' package
+states2021 <- states(year = 2021, cb = TRUE)
+
+# Join the DPxx* values to the core-based statistical area geometries and filter for Louisiana
+LA2021morgan_massey <- cbsa2021 %>%
+ left_join(morgan_massey2021LA$dpxx_star, by = 'GEOID') %>%
+ filter(!st_is_empty(.)) %>%
+ filter(!is.na(DPxx_star)) %>%
+ st_filter(states2021 %>% filter(STUSPS == 'LA'), .predicate = st_within) %>%
+ st_make_valid()
# Visualize the DPxx* values (2017-2021 5-year ACS) for census tracts within core-based statistical areas of Louisiana
+ggplot() +
+ geom_sf(
+ data = LA2021morgan_massey,
+ aes(fill = DPxx_star),
+ size = 0.05,
+ color = 'white'
+ ) +
+ geom_sf(
+ data = states2021 %>% filter(STUSPS == 'LA'),
+ fill = 'transparent',
+ color = 'black',
+ size = 0.2
+ ) +
+ theme_minimal() +
+ scale_fill_viridis_c(limits = c(0, 1)) +
+ labs(fill = 'Index (Continuous)', caption = 'Source: U.S. Census ACS 2017-2021 estimates') +
+ ggtitle(
+ 'Distance-Decay Isolation Index (Morgan)\nCensus tracts within core-based statistical areas of Louisiana',
+ subtitle = 'Black population'
+ )
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
@@ -1838,7 +1895,7 @@ Compute Distance-Decay Isolation Index (DPxx*)
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
-## [1] tigris_2.1 tidycensus_1.6.5 sf_1.0-16 ndi_0.1.6.9013
+## [1] tigris_2.1 tidycensus_1.6.5 sf_1.0-16 ndi_0.1.6.9014
## [5] ggplot2_3.5.1 dplyr_1.1.4 knitr_1.48
##
## loaded via a namespace (and not attached):