diff --git a/DESCRIPTION b/DESCRIPTION index fafe1bb..23b9b0e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,9 +35,8 @@ Imports: multidplyr, readr, hwsdr, - raster, + terra, stringr, - sp, ncdf4, signal, sf, diff --git a/R/helper_functions.R b/R/helper_functions.R index c7c5c97..7f86572 100644 --- a/R/helper_functions.R +++ b/R/helper_functions.R @@ -89,4 +89,6 @@ findna_tail <- function( vec ){ } -na.omit.list <- function(y) { return(y[!sapply(y, function(x) all(is.na(x)))]) } \ No newline at end of file +na.omit.list <- function(y){ + return(y[!sapply(y, function(x) all(is.na(x)))]) + } \ No newline at end of file diff --git a/R/ingest.R b/R/ingest.R index de00494..6a93377 100644 --- a/R/ingest.R +++ b/R/ingest.R @@ -76,11 +76,13 @@ ingest <- function( if (!(source %in% c( "hwsd", "etopo1", + "stocker23", "wwf", "soilgrids", "wise", "gsde", - "worldclim")) + "worldclim" + )) ) { # complement dates information @@ -842,14 +844,28 @@ ingest <- function( # Get ETOPO1 elevation data. year_start and year_end not required - ddf <- ingest_globalfields(siteinfo, - source = source, - dir = dir, - getvars = NULL, - timescale = NULL, - verbose = FALSE + ddf <- ingest_globalfields( + siteinfo, + source = source, + dir = dir, + getvars = NULL, + timescale = NULL, + verbose = FALSE ) - + + } else if (source == "stocker23"){ + + # Get root zone water storage capacity data. year_start and year_end not required + + ddf <- ingest_globalfields( + siteinfo, + source = source, + dir = dir, + getvars = NULL, + timescale = NULL, + verbose = FALSE + ) + } else if (source == "hwsd"){ # Get HWSD soil data. year_start and year_end not required @@ -893,8 +909,14 @@ ingest <- function( layer = settings$layer, dir = dir)) %>% purrr::reduce(left_join, by = c("lon", "lat")) %>% distinct() %>% - right_join(dplyr::select( - siteinfo, sitename, lon, lat), by = c("lon", "lat")) %>% + right_join( + dplyr::select(all_of( + siteinfo, + sitename, + lon, + lat + )), + by = c("lon", "lat")) %>% dplyr::select(-lon, -lat) } else if (source == "gsde"){ @@ -941,7 +963,7 @@ ingest <- function( stop( "ingest(): Argument 'source' could not be identified. Use one of 'fluxnet', 'cru', 'watch_wfdei', 'wfde5', - co2_mlo', 'etopo1', or 'gee'.") + co2_mlo', 'etopo1', 'stocker23', or 'gee'.") } ddf <- ddf %>% diff --git a/R/ingest_bysite.R b/R/ingest_bysite.R index 64d223c..72c1e4d 100644 --- a/R/ingest_bysite.R +++ b/R/ingest_bysite.R @@ -66,6 +66,7 @@ ingest_bysite <- function( if (!(source %in% c( "etopo1", + "stocker23", "hwsd", "soilgrids", "wise", @@ -640,6 +641,25 @@ ingest_bysite <- function( verbose = FALSE ) + } else if (source == "stocker23"){ + + # Get ETOPO1 elevation data. year_start and year_end not required + + siteinfo <- tibble( + sitename = sitename, + lon = lon, + lat = lat + ) + + df <- ingest_globalfields( + siteinfo, + source = source, + getvars = NULL, + dir = dir, + timescale = NULL, + verbose = FALSE + ) + } else if (source == "hwsd"){ # Get HWSD soil data. year_start and year_end not required @@ -784,11 +804,11 @@ ingest_bysite <- function( rlang::warn(paste("you selected source =", source)) stop("ingest(): Argument 'source' could not be identified. Use one of 'fluxnet', 'cru', 'watch_wfdei', 'wfde5', - 'co2_mlo', 'etopo1', or 'gee'.") + 'co2_mlo', 'etopo1', 'stocker23', or 'gee'.") } # add data frame to nice data frame containing all required time steps - if (!(source %in% c("etopo1", "hwsd", "soilgrids", "wise", "gsde", "worldclim"))){ + if (!(source %in% c("etopo1", "stocker23", "hwsd", "soilgrids", "wise", "gsde", "worldclim"))){ if (timescale=="m"){ df <- df_tmp %>% mutate(month = lubridate::month(date), year = lubridate::year(date)) %>% diff --git a/R/ingest_globalfields.R b/R/ingest_globalfields.R index b33b231..374c2c7 100644 --- a/R/ingest_globalfields.R +++ b/R/ingest_globalfields.R @@ -52,7 +52,7 @@ ingest_globalfields <- function( stop("At least one entry for siteinfo$sitename is missing.") } - if (!(source %in% c("etopo1", "wwf", "gsde", "worldclim"))){ + if (!(source %in% c("etopo1", "stocker23", "wwf", "gsde", "worldclim"))){ # get a daily (monthly) data frame with all dates for all sites # (if monthly, day 15 of each month) @@ -424,12 +424,33 @@ ingest_globalfields <- function( lat = siteinfo$lat ) - df_out <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = FALSE ) %>% - dplyr::select(-lon, -lat) %>% - tidyr::unnest(data) %>% - dplyr::rename(elv = V1) %>% + df_out <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = FALSE ) |> + dplyr::ungroup() |> + dplyr::select(-lon, -lat) |> + tidyr::unnest(data) |> + dplyr::rename(elv = ETOPO1_Bed_g_geotiff) |> dplyr::select(sitename, elv) + } else if (source == "stocker23"){ + + filename <- list.files(dir, pattern = "cwdx80_forcing_halfdeg.nc") + if (length(filename) > 1) stop("ingest_globalfields(): Found more than 1 file for source 'stocker23'.") + if (length(filename) == 0) stop("ingest_globalfields(): Found no files for source 'stocker23' in the directory provided by argument 'dir'.") + + # re-construct this data frame (tibble) - otherwise SpatialPointsDataframe() won't work + df_lonlat <- tibble( + sitename = siteinfo$sitename, + lon = siteinfo$lon, + lat = siteinfo$lat + ) + + df_out <- extract_pointdata_allsites( paste0(dir, filename), df_lonlat, get_time = FALSE ) |> + dplyr::ungroup() |> + dplyr::select(-lon, -lat) |> + tidyr::unnest(data) |> + dplyr::rename(whc = cwdx80_forcing) |> + dplyr::select(sitename, whc) + } else if (source == "gsde"){ # re-construct this data frame (tibble) - otherwise SpatialPointsDataframe() won't work @@ -611,15 +632,15 @@ ingest_globalfields_watch_byvar <- function( ddf, siteinfo, dir, varnam ) { rowwise() %>% dplyr::mutate(filename = paste0( dirn, "/", varnam, addstring, sprintf( "%4d", yr ), sprintf( "%02d", mo ), ".nc" )) %>% ungroup() %>% - dplyr::mutate(data = purrr::map(filename, ~extract_pointdata_allsites(., df_lonlat, get_time = FALSE ) )) + dplyr::mutate(data = purrr::map(filename, ~extract_pointdata_allsites(., df_lonlat, get_time = TRUE ) )) # rearrange to a daily data frame complement_df <- function(df){ - df <- df %>% - stats::setNames(., c("myvar")) %>% - mutate( dom = 1:nrow(.)) + df <- df |> + dplyr::select(dom = tstep, myvar = value) return(df) } + ddf <- df %>% tidyr::unnest(data) %>% dplyr::mutate(data = purrr::map(data, ~complement_df(.))) %>% @@ -1122,33 +1143,68 @@ extract_pointdata_allsites <- function( # load file using the raster library #print(paste("Creating raster brick from file", filename)) if (!file.exists(filename)) stop(paste0("File not found: ", filename)) + # message(paste0("Reading file: ", filename)) - rasta <- raster::brick(filename) - - df_lonlat <- raster::extract( - rasta, - sp::SpatialPoints(dplyr::select(df_lonlat, lon, lat)), # , proj4string = rasta@crs - sp = TRUE - ) %>% - as_tibble() %>% - tidyr::nest(data = c(-lon, -lat)) %>% - right_join(df_lonlat, by = c("lon", "lat")) %>% - mutate( data = purrr::map(data, ~dplyr::slice(., 1)) ) %>% - dplyr::mutate(data = purrr::map(data, ~t(.))) %>% - dplyr::mutate(data = purrr::map(data, ~as_tibble(.))) - - # xxx todo: use argument df = TRUE in the extract() function call in order to - # return a data frame directly (and not having to rearrange the data afterwards) - # xxx todo: implement the GWR method for interpolating using elevation as a - # covariate here. + + # new code with terra library + rasta <- terra::rast(filename) + coords <- dplyr::select(df_lonlat, lon, lat) + points <- terra::vect(coords, geom = c("lon", "lat"), crs = "EPSG:4326") + values <- terra::extract(rasta, points, xy = FALSE, ID = FALSE, method = "bilinear") if (get_time){ - timevals <- raster::getZ(rasta) - df_lonlat <- df_lonlat %>% - mutate( data = purrr::map(data, ~bind_cols(., tibble(date = timevals)))) + + out <- df_lonlat |> + dplyr::select(sitename, lon, lat) |> + bind_cols( + values + ) |> + tidyr::pivot_longer(-one_of(c("lon", "lat", "sitename")), names_to = "tstep") |> + tidyr::separate_wider_delim( + tstep, + delim = "=", + names = c("varnam", "tstep") + ) |> + dplyr::mutate( + tstep = as.numeric(tstep) + 1, + varnam = stringr::str_remove(varnam, "_tstep") + ) |> + dplyr::group_by(sitename, lon, lat) |> + tidyr::nest() + + } else { + + out <- df_lonlat |> + dplyr::select(sitename, lon, lat) |> + bind_cols( + values + ) |> + dplyr::group_by(sitename, lon, lat) |> + tidyr::nest() + } - return(df_lonlat) + # # old code with {raster} library: + # rasta <- raster::brick(filename) + # df_lonlat <- raster::extract( + # rasta, + # sp::SpatialPoints(dplyr::select(df_lonlat, lon, lat)), # , proj4string = rasta@crs + # sp = TRUE + # ) %>% + # as_tibble() %>% + # tidyr::nest(data = c(-lon, -lat)) %>% + # right_join(df_lonlat, by = c("lon", "lat")) %>% + # mutate( data = purrr::map(data, ~dplyr::slice(., 1)) ) %>% + # dplyr::mutate(data = purrr::map(data, ~t(.))) %>% + # dplyr::mutate(data = purrr::map(data, ~as_tibble(.))) + # + # if (get_time){ + # timevals <- raster::getZ(rasta) + # df_lonlat <- df_lonlat %>% + # mutate( data = purrr::map(data, ~bind_cols(., tibble(date = timevals)))) + # } + + return(out) } diff --git a/R/ingest_modis_bysite.R b/R/ingest_modis_bysite.R index 24c990f..0618d21 100644 --- a/R/ingest_modis_bysite.R +++ b/R/ingest_modis_bysite.R @@ -71,6 +71,7 @@ ingest_modis_bysite <- function( ) } else { + filnam_daily_csv <- file.path( dirnam_daily_csv, paste0( diff --git a/vignettes/example.Rmd b/vignettes/example.Rmd index 68557db..d6ab8f5 100644 --- a/vignettes/example.Rmd +++ b/vignettes/example.Rmd @@ -178,12 +178,12 @@ df_watch <- ingest_bysite( getvars = c("temp"), dir = "~/data/watch_wfdei/", timescale = "d", - year_start = 2007, - year_end = 2007, + year_start = 2018, + year_end = 2018, lon = 3.5958, lat = 43.7414, - verbose = TRUE, - settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim") + verbose = TRUE + #settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim") ) df_watch ``` @@ -855,11 +855,16 @@ WATCH-WFDEI data is available for years from 1979. If `year_start` is before tha ```{r warning=FALSE, eval = FALSE} ddf_watch <- ingest( - siteinfo = siteinfo %>% slice(1:2), + siteinfo = siteinfo %>% + slice(1:2) |> + mutate( + year_star = 2018, + year_end = 2018 + ), source = "watch_wfdei", getvars = c("temp", "prec"), - dir = "~/data/watch_wfdei/", # adjust this with your local path - settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim") + dir = "~/data/watch_wfdei/" # adjust this with your local path + # settings = list(correct_bias = "worldclim", dir_bias = "~/data/worldclim") ) ``` @@ -1022,6 +1027,19 @@ df_etopo <- ingest( ) ``` +## Root-zone water-storage capacity + +This reads from the 0.05 degrees resolution map of root-zone water-storage capacity from [Stocker et al. (2023)](https://www.nature.com/articles/s41561-023-01125-2). The nested data column contains a tibble with one value for one variable `whc`. Download the data from [here](https://doi.org/10.5281/zenodo.10885724) and specify the local path with the argument `dir`. + + +```{r warning=FALSE, eval = FALSE} +df_stocker23 <- ingest( + siteinfo, + source = "stocker23", + dir ="~/data/mct_data/" # adjust this with your local path +) +``` + ## HWSD Four steps are required before you can use `ingest()` to get HWSD data: