Skip to content

Commit

Permalink
modularise the get and process functions; fix #52
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestguevarra committed Aug 19, 2024
1 parent 7b80dfc commit 700db3a
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 83 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ export(eq_get_bulletin_urls)
export(eq_get_bulletin_urls_)
export(eq_get_bulletins)
export(eq_get_table)
export(eq_get_tables)
export(eq_process_bulletins)
export(eq_process_table)
export(eq_process_tables)
importFrom(dplyr,across)
importFrom(dplyr,bind_rows)
importFrom(dplyr,everything)
Expand Down
4 changes: 2 additions & 2 deletions R/eq_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
eq_data_summary <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
.year = NULL, .month = NULL, latest = TRUE) {
## Retrieve data tables and process ----
eq_get_table(.url = .url, .year = .year, .month = .month, latest = latest) |>
eq_process_table(simplify = TRUE)
eq_get_tables(.url = .url, .year = .year, .month = .month, latest = latest) |>
eq_process_tables()
}

#'
Expand Down
62 changes: 38 additions & 24 deletions R/eq_get.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
#' @returns A list of tibble/s of raw earthquake information from PHIVOLCS.
#'
#' @examples
#' eq_get_table()
#' eq_get_tables()
#'
#' @rdname eq_get
#' @rdname eq_get_table
#' @export
#'

eq_get_table <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
eq_get_tables <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
.year = NULL, .month = NULL, latest = TRUE) {
## Build URLs ----
if (is.null(.year) & is.null(.month)) {
Expand All @@ -45,28 +45,42 @@ eq_get_table <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
## Retrieve and structure data ----
lapply(
X = urls,
FUN = function(x) {
rvest::session(x) |>
rvest::html_table() |>
(\(x)
{
df <- x[lapply(X = x, FUN = function(x) nrow(x) > 1) |> unlist()][[1]] |>
dplyr::select(1:6)
FUN = eq_get_table
)
}

#'
#' @rdname eq_get_table
#' @export
#'

if ("X1" %in% names(df)) {
df <- df |>
dplyr::filter(
stringr::str_detect(string = .data$X1, pattern = "[0-9]{2}")
)
}
eq_get_table <- function(.url) {
## Check URL ----
url_error <- httr::http_error(.url)

df
if (url_error) {
NULL

Check warning on line 62 in R/eq_get.R

View check run for this annotation

Codecov / codecov/patch

R/eq_get.R#L62

Added line #L62 was not covered by tests
} else {
rvest::session(.url) |>
rvest::html_table() |>
(\(x)
{
df <- x[lapply(X = x, FUN = function(x) nrow(x) > 1) |> unlist()][[1]] |>
dplyr::select(1:6)

if ("X1" %in% names(df)) {
df <- df |>
dplyr::filter(
stringr::str_detect(string = .data$X1, pattern = "[0-9]{2}")
)
}
)() |>
dplyr::mutate(
dplyr::across(.cols = dplyr::everything(), .fns = ~as.character(.x)),
date_time_retrieved = Sys.time(), .before = 1
)
}
)

df
}
)() |>
dplyr::mutate(
dplyr::across(.cols = dplyr::everything(), .fns = ~as.character(.x)),
date_time_retrieved = Sys.time(), .before = 1
)
}
}
78 changes: 41 additions & 37 deletions R/eq_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,51 +3,55 @@
#'
#' @param eq_data_list A list of tibble/s of raw earthquake data extracted from
#' PHIVOLCS earthquake information monitoring HTMLs using `eq_get_table`.
#' @param simplify Logical. Should output be simplified into a data.frame or
#' tibble? Default is TRUE. Otherwise, a list of processed tibbles of
#' earthquake monitoring data.
#'
#' @returns A tibble of processed earthquake data.
#' @returns A tibble of processed earthquake data summaries.
#'
#' @examples
#' eq_tab <- eq_get_table()
#' eq_process_table(eq_tab)
#' eq_tab <- eq_get_tables()
#' eq_process_tables(eq_tab)
#'
#' @rdname eq_process_table
#' @export
#'

eq_process_table <- function(eq_data_list, simplify = TRUE) {
df <- lapply(
eq_process_tables <- function(eq_data_list) {
lapply(
X = eq_data_list,
FUN = function(x) {
x |>
dplyr::rename_with(
.fn = function(x) c(
"date_time_retrieved", "date_time",
"latitude", "longitude",
"depth", "magnitude", "location"
)
) |>
dplyr::mutate(
date_time = strptime(
.data$date_time, format = "%d %B %Y - %I:%M %p", tz = "PST"
),
dplyr::across(
.cols = .data$latitude:.data$magnitude,
.fns = ~suppressWarnings(as.numeric(.x))
),
location = stringr::str_remove_all(
string = .data$location, pattern = "\n\t\t\\s+|\r|\n\\s+"
) |>
stringr::str_replace_all(pattern = "^0+", replacement = "") |>
stringr::str_replace_all(pattern = "km| km", replacement = " km")
)
}
)
FUN = eq_process_table
) |>
dplyr::bind_rows()
}

if (simplify) {
df <- dplyr::bind_rows(df)
}
#'
#' @rdname eq_process_table
#' @export
#'

df
eq_process_table <- function(eq_data_list) {
if (is.null(eq_data_list)) {
NULL

Check warning on line 32 in R/eq_process.R

View check run for this annotation

Codecov / codecov/patch

R/eq_process.R#L32

Added line #L32 was not covered by tests
} else {
eq_data_list |>
dplyr::rename_with(
.fn = function(x) c(
"date_time_retrieved", "date_time",
"latitude", "longitude",
"depth", "magnitude", "location"
)
) |>
dplyr::mutate(
date_time = strptime(
.data$date_time, format = "%d %B %Y - %I:%M %p", tz = "PST"
),
dplyr::across(
.cols = .data$latitude:.data$magnitude,
.fns = ~suppressWarnings(as.numeric(.x))
),
location = stringr::str_remove_all(
string = .data$location, pattern = "\n\t\t\\s+|\r|\n\\s+"
) |>
stringr::str_replace_all(pattern = "^0+", replacement = "") |>
stringr::str_replace_all(pattern = "km| km", replacement = " km")
)
}
}
9 changes: 6 additions & 3 deletions man/eq_get.Rd → man/eq_get_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 8 additions & 9 deletions man/eq_process_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions tests/testthat/test-eq_get.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Tests for get table function -------------------------------------------------

tab_list <- eq_get_table()
tab_list <- eq_get_tables()

test_that("get table function outputs are as expected", {
expect_type(tab_list, "list")
Expand All @@ -10,7 +10,7 @@ test_that("tables are of the correct structure", {
expect_vector(tab_list, size = 1)
})

tab_list <- eq_get_table(latest = FALSE)
tab_list <- eq_get_tables(latest = FALSE)

current_month <- format(Sys.Date(), "%B")
size <- 72 + (1:12)[month.name == current_month]
Expand All @@ -23,7 +23,7 @@ test_that("tables are of the correct structure", {
expect_vector(tab_list, size = size)
})

tab_list <- eq_get_table(.year = 2020)
tab_list <- eq_get_tables(.year = 2020)

test_that("get table function outputs are as expected", {
expect_type(tab_list, "list")
Expand All @@ -33,7 +33,7 @@ test_that("tables are of the correct structure", {
expect_vector(tab_list, size = 12)
})

tab_list <- eq_get_table(.year = 2020, .month = 1)
tab_list <- eq_get_tables(.year = 2020, .month = 1)

test_that("get table function outputs are as expected", {
expect_type(tab_list, "list")
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-eq_process.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Tests for eq_process function ------------------------------------------------

df <- eq_get_table() |>
eq_process_table()
df <- eq_get_tables() |>
eq_process_tables()

test_that("eq_process outputs are appropriate", {
expect_s3_class(df, "tbl")
Expand All @@ -22,8 +22,8 @@ test_that("eq_process outputs are appropriate", {
})


df <- eq_get_table(.year = 2020) |>
eq_process_table()
df <- eq_get_tables(.year = 2020) |>
eq_process_tables()

test_that("eq_process outputs are appropriate", {
expect_s3_class(df, "tbl")
Expand Down

0 comments on commit 700db3a

Please sign in to comment.