Skip to content

Commit

Permalink
Support for system catalogs
Browse files Browse the repository at this point in the history
Full history: 699813b

refactor `dbListTables()` with `list_tables()`, now orders result by `table_type` and `table_name`
refactor `dbExistsTable()` with `list_tables()`
refactor `dbListObjects()` with `list_tables()`
merge `find_table()` code into `list_fields()`
`find_table()` isn't used anywhere else anymore (e.g. `exists_table()`)
simplify the "get current_schemas() as table" code
pass full `id` to `list_fields()`
align `dbExistsTable()` with `dbListFields()`
add some comments and whitespace
simplify `where_schema` in `list_tables()`
align `where_table` with `where_schema` in `list_tables()`
add `system_catalogs` argument to `dbConnect()`
add materialized view tests
`list_tables()`: query system catalogs if available
`list_fields()`: query system catalogs if available
  • Loading branch information
dpprdan authored and krlmlr committed Apr 1, 2024
1 parent d689f88 commit 0524d53
Show file tree
Hide file tree
Showing 14 changed files with 315 additions and 52 deletions.
1 change: 1 addition & 0 deletions R/PqConnection.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ setClass("PqConnection",
slots = list(
ptr = "externalptr",
bigint = "character",
system_catalogs = "logical",
timezone = "character",
timezone_out = "character",
typnames = "data.frame"
Expand Down
14 changes: 12 additions & 2 deletions R/dbConnect_PqDriver.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@
#' @param check_interrupts Should user interrupts be checked during the query execution (before
#' first row of data is available)? Setting to `TRUE` allows interruption of queries
#' running too long.
#' @param system_catalogs Should `dbList*()` functions query the [`system
#' catalogs`](https://www.postgresql.org/docs/current/catalogs.html) (`TRUE`)
#' or the
#' [`information_schema`](https://www.postgresql.org/docs/current/information-schema.html)?
#' The `information_schema` does not contain PostgreSQL-specific information,
#' in particular [Materialized
#' Views](https://www.postgresql.org/docs/current/sql-creatematerializedview.html).
#' @param timezone Sets the timezone for the connection. The default is `"UTC"`.
#' If `NULL` then no timezone is set, which defaults to the server's time zone.
#' @param timezone_out The time zone returned to R, defaults to `timezone`.
Expand All @@ -60,7 +67,8 @@
dbConnect_PqDriver <- function(drv, dbname = NULL,
host = NULL, port = NULL, password = NULL, user = NULL, service = NULL, ...,
bigint = c("integer64", "integer", "numeric", "character"),
check_interrupts = FALSE, timezone = "UTC", timezone_out = NULL) {
check_interrupts = FALSE, system_catalogs = TRUE,
timezone = "UTC", timezone_out = NULL) {
opts <- unlist(list(
dbname = dbname, user = user, password = password,
host = host, port = as.character(port), service = service, client_encoding = "utf8", ...
Expand All @@ -70,6 +78,7 @@ dbConnect_PqDriver <- function(drv, dbname = NULL,
}
bigint <- match.arg(bigint)
stopifnot(is.logical(check_interrupts), all(!is.na(check_interrupts)), length(check_interrupts) == 1)
stopifnot(is.logical(system_catalogs))
if (!is.null(timezone)) {
stopifnot(is.character(timezone), all(!is.na(timezone)), length(timezone) == 1)
}
Expand All @@ -85,7 +94,8 @@ dbConnect_PqDriver <- function(drv, dbname = NULL,

# timezone is set later
conn <- new("PqConnection",
ptr = ptr, bigint = bigint, timezone = character(), typnames = data.frame()
ptr = ptr, bigint = bigint, system_catalogs = system_catalogs,
timezone = character(), typnames = data.frame()
)
on.exit(dbDisconnect(conn))

Expand Down
17 changes: 16 additions & 1 deletion R/dbConnect_RedshiftDriver.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,22 @@ dbConnect_RedshiftDriver <- function(drv, dbname = NULL,
host = NULL, port = NULL, password = NULL, user = NULL, service = NULL, ...,
bigint = c("integer64", "integer", "numeric", "character"),
check_interrupts = FALSE, timezone = "UTC") {
new("RedshiftConnection", callNextMethod())
new("RedshiftConnection",
callNextMethod(
drv = drv,
dbname = dbname,
host = host,
port = port,
password = password,
user = user,
service = service,
...,
bigint = bigint,
check_interrupts = check_interrupts,
system_catalogs = FALSE,
timezone = timezone
)
)
}

#' @rdname Redshift
Expand Down
2 changes: 1 addition & 1 deletion R/dbExistsTable_PqConnection_Id.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' @rdname postgres-tables
#' @usage NULL
dbExistsTable_PqConnection_Id <- function(conn, name, ...) {
exists_table(conn, id = name@name)
exists_table(conn, id = name)
}

#' @rdname postgres-tables
Expand Down
9 changes: 5 additions & 4 deletions R/dbExistsTable_PqConnection_character.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
#' @usage NULL
dbExistsTable_PqConnection_character <- function(conn, name, ...) {
stopifnot(length(name) == 1L)
name <- dbQuoteIdentifier(conn, name)

# Convert to identifier
id <- dbUnquoteIdentifier(conn, name)[[1]]@name
# use (Un)QuoteIdentifier roundtrip instead of Id(table = name)
# so that quoted names (possibly incl. schema) can be passed to `name` e.g.
# name = dbQuoteIdentifier(conn, Id(schema = "sname", table = "tname"))
quoted <- dbQuoteIdentifier(conn, name)
id <- dbUnquoteIdentifier(conn, quoted)[[1]]
exists_table(conn, id)
}

Expand Down
2 changes: 1 addition & 1 deletion R/dbListFields_PqConnection_Id.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' @rdname postgres-tables
#' @usage NULL
dbListFields_PqConnection_Id <- function(conn, name, ...) {
list_fields(conn, name@name)
list_fields(conn, id = name)
}

#' @rdname postgres-tables
Expand Down
2 changes: 1 addition & 1 deletion R/dbListFields_PqConnection_character.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#' @usage NULL
dbListFields_PqConnection_character <- function(conn, name, ...) {
quoted <- dbQuoteIdentifier(conn, name)
id <- dbUnquoteIdentifier(conn, quoted)[[1]]@name
id <- dbUnquoteIdentifier(conn, quoted)[[1]]

list_fields(conn, id)
}
Expand Down
25 changes: 19 additions & 6 deletions R/dbListObjects_PqConnection_ANY.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) {
null_varchar <- "NULL::text"
}
query <- paste0(
"SELECT ", null_varchar, " AS schema, table_name AS table FROM INFORMATION_SCHEMA.tables\n",
"WHERE (table_schema = ANY(current_schemas(true))) AND (table_schema <> 'pg_catalog')\n",
"SELECT ", null_varchar, " AS schema, table_name AS table FROM ( \n",
list_tables(conn = conn, order_by = "table_type, table_name"),
") as table_query \n",
"UNION ALL\n",
"SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM INFORMATION_SCHEMA.tables"
"SELECT DISTINCT table_schema AS schema, ", null_varchar, " AS table FROM ( \n",
list_tables(conn = conn, where_schema = "true"),
") as schema_query;"
)
} else {
if (!is.list(prefix)) prefix <- list(prefix)
Expand All @@ -27,10 +30,20 @@ dbListObjects_PqConnection_ANY <- function(conn, prefix = NULL, ...) {
schemas <- vcapply(prefix[is_prefix], function(x) x@name[["schema"]])
if (length(schemas) > 0) {
schema_strings <- dbQuoteString(conn, schemas)
where_schema <-
paste0(
"table_schema IN (",
paste(schema_strings, collapse = ", "),
") \n"
)
query <- paste0(
"SELECT table_schema AS schema, table_name AS table FROM INFORMATION_SCHEMA.tables\n",
"WHERE ",
"(table_schema IN (", paste(schema_strings, collapse = ", "), "))"
"SELECT table_schema AS schema, table_name AS table FROM ( \n",
list_tables(
conn = conn,
where_schema = where_schema,
order_by = "table_type, table_name"
),
") as table_query"
)
}
}
Expand Down
9 changes: 3 additions & 6 deletions R/dbListTables_PqConnection.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
#' @rdname postgres-tables
#' @usage NULL
dbListTables_PqConnection <- function(conn, ...) {
query <- paste0(
"SELECT table_name FROM INFORMATION_SCHEMA.tables ",
"WHERE ",
"(table_schema = ANY(current_schemas(true))) AND (table_schema <> 'pg_catalog')"
)
dbGetQuery(conn, query)[[1]]
query <- list_tables(conn = conn, order_by = "table_type, table_name")

dbGetQuery(conn, query)[["table_name"]]
}

#' @rdname postgres-tables
Expand Down
Loading

0 comments on commit 0524d53

Please sign in to comment.