Skip to content

Commit

Permalink
create functions for getting bulletins; fix #42
Browse files Browse the repository at this point in the history
  • Loading branch information
ernestguevarra committed Aug 18, 2024
1 parent 3d29d70 commit 3e2258c
Show file tree
Hide file tree
Showing 11 changed files with 250 additions and 138 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ Imports:
httr,
rlang,
rvest,
stringr
stringr,
tibble
Suggests:
spelling,
testthat (>= 3.0.0)
Expand Down
6 changes: 4 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

export(eq_build_url)
export(eq_data)
export(eq_get_links)
export(eq_get_links_)
export(eq_get_bulletin)
export(eq_get_bulletin_urls)
export(eq_get_bulletins)
export(eq_get_table)
export(eq_process_table)
importFrom(dplyr,bind_rows)
Expand All @@ -20,3 +21,4 @@ importFrom(stringr,str_detect)
importFrom(stringr,str_remove_all)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_to_title)
importFrom(tibble,tibble)
105 changes: 0 additions & 105 deletions R/eq_get.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#'
#' @examples
#' eq_get_table()
#' eq_get_links()
#'
#' @rdname eq_get
#' @export
Expand Down Expand Up @@ -71,107 +70,3 @@ eq_get_table <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
}
)
}

#'
#' @rdname eq_get
#' @export
#'

eq_get_links_ <- function(.url) {
## Detect year and month from URL ----
.year <- stringr::str_extract(string = .url, pattern = "[0-9]{4}") |>
as.integer()
.month <- stringr::str_extract(
string = .url, pattern = paste(month.name, collapse = "|")
)

## Quiet down error on SSL ----
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Retrieve links ----
if (.year == 2018 & .month %in% month.name[seq_len(5)]) {
rvest::session(.url) |>
rvest::html_elements(css = "tr td .auto-style49 a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(string = x, pattern = "^../../")
)
}
)()
} else {
rvest::session(.url) |>
rvest::html_elements(css = ".auto-style91 a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(
string = x, pattern = "^../../|\\\\..\\\\..\\\\"
) |>
stringr::str_replace_all(pattern = "\\\\", replacement = "/")
)
}
)()
}
}

#'
#' @rdname eq_get
#' @export
#'

eq_get_links <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
.year = NULL, .month = NULL, latest = TRUE) {
## Build URLs ----
if (is.null(.year) & is.null(.month)) {
if (latest) {
urls <- .url
} else {
urls <- eq_build_url(.url = .url, .year = .year, .month = .month)
}
} else {
urls <- eq_build_url(.url = .url, .year = .year, .month = .month)
}

## Quiet down error on SSL ----
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Retrieve and structure data ----
lapply(
X = urls,
FUN = eq_get_links_
) |>
unlist()

# url_list_names <- urls |>
# (\(x)
# {
# paste(
# stringr::str_extract(
# string = x, pattern = paste(month.name, collapse = "|")
# ),
# stringr::str_extract(
# string = x, pattern = "[0-9]{4}"
# )
# )
# }
# )() |>
# (\(x)ifelse(x == "NA NA", format(Sys.Date(), format = "%B %Y"), x))()
#
# names(url_list) <- url_list_names
#
# url_list
}

#'
#' @rdname eq_get
#' @export
#'
98 changes: 98 additions & 0 deletions R/eq_get_bulletin_urls.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#'
#' Get PHIVOLCS earthquake information bulletins
#'
#' @param .url Base URL for PHIVOLCS earthquake bulletins.
#' @param .year A vector for year (in YYYY format) for which earthquake
#' bulletins are to be retrieved. The earliest year that can be specified is
#' 2018. If set to NULL (default), all years starting from 2018 to present
#' year are used.
#' @param .month A vector for month for which earthquake bulletins are
#' to be retrieved. This can be set as either an integer index (1 for January)
#' or abbreviation (Jan for January) for full name. If set to NULL (default),
#' all months are used.
#' @param latest Logical. Should the latest table of earthquake information be
#' retrieved? Only evaluated if `.year = NULL` and `.month = NULL`. If TRUE
#' (default), table of earthquake information for current year and current
#' month is retrieved. Otherwise, all months for all possible years are
#' retrieved.
#'
#' @returns A character vector of URLs for PHIVOLCS earthquake information
#' bulletins.
#'
#' @examples
#' eq_get_bulletin_urls()
#'
#' @rdname eq_get_bulletin
#' @export
#'

eq_get_bulletin_urls <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
.year = NULL, .month = NULL, latest = TRUE) {
## Build URLs ----
if (is.null(.year) & is.null(.month)) {
if (latest) {
urls <- .url
} else {
urls <- eq_build_url(.url = .url, .year = .year, .month = .month)
}
} else {
urls <- eq_build_url(.url = .url, .year = .year, .month = .month)
}

## Quiet down error on SSL ----
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Retrieve URLs ----
lapply(
X = urls,
FUN = eq_get_links_
) |>
unlist()
}


eq_get_links_ <- function(.url) {
## Detect year and month from URL ----
.year <- stringr::str_extract(string = .url, pattern = "[0-9]{4}") |>
as.integer()
.month <- stringr::str_extract(
string = .url, pattern = paste(month.name, collapse = "|")
)

## Quiet down error on SSL ----
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Retrieve links ----
if (.year == 2018 & .month %in% month.name[seq_len(5)]) {
rvest::session(.url) |>
rvest::html_elements(css = "tr td .auto-style49 a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(string = x, pattern = "^../../")
)
}
)()
} else {
rvest::session(.url) |>
rvest::html_elements(css = ".auto-style91 a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(
string = x, pattern = "^../../|\\\\..\\\\..\\\\"
) |>
stringr::str_replace_all(pattern = "\\\\", replacement = "/")
)
}
)()
}
}
53 changes: 53 additions & 0 deletions R/eq_get_bulletins.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#'
#' Retrieve raw information from PHIVOLCS earthquake information bulletins
#'
#' @param .url A character value or vector of values for PHIVOLCS earthquake
#' information bulletins
#'
#' @returns A tibble of earthquake information from PHIVOLCS bulletins
#'
#' @examples
#' urls <- eq_get_bulletin_urls()
#' eq_get_bulletins(urls[1:3])
#'
#' @rdname eq_get_bulletin
#' @export
#'

eq_get_bulletin <- function(.url) {
rvest::session(url = .url) |>
rvest::html_table() |>
(\(x) x[[1]])() |>
(\(x)
{
tibble::tibble(
date_time = x[2, 4],
bulletin_number = stringr::str_extract(
string = x[1, 1], pattern = "[0-9]{1,}"
),
depth = x[2, 8],
magnitude = x[2, 12],
location = x[2, 6],
origin = x[2, 10],
reported_intensity = x[9, 3],
expect_damage = x[11, 4],
expect_aftershocks = x[11, 6],
date_time_issued = x[11, 8],
prepared_by = x[11, 10]
)
}
)()
}

#'
#' @rdname eq_get
#' @export
#'

eq_get_bulletins <- function(.url) {
lapply(
X = .url,
FUN = eq_get_bulletin
) |>
dplyr::bind_rows()
}
1 change: 1 addition & 0 deletions R/lindol-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
#' @importFrom dplyr mutate relocate rename_with bind_rows select everything
#' @importFrom stringr str_to_title str_remove_all str_replace_all str_detect
#' @importFrom rvest session html_table
#' @importFrom tibble tibble
#'
"_PACKAGE"
15 changes: 3 additions & 12 deletions man/eq_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions man/eq_get_bulletin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3e2258c

Please sign in to comment.