Skip to content

Commit

Permalink
more work on boolean types
Browse files Browse the repository at this point in the history
  • Loading branch information
ablack3 committed Aug 22, 2024
1 parent f1f7a49 commit b5b5c26
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 48 deletions.
19 changes: 16 additions & 3 deletions R/InsertTable.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

getSqlDataTypes <- function(column) {
getSqlDataTypes <- function(column, dbms) {
if (is.integer(column)) {
return("INTEGER")
} else if (is(column, "POSIXct") | is(column, "POSIXt")) {
Expand All @@ -28,7 +28,12 @@ getSqlDataTypes <- function(column) {
} else if (is.numeric(column)) {
return("FLOAT")
} else if (is.logical(column)) {
return("BOOLEAN")
return(switch(
dbms,
"sql server" = "BIT",
"oracle" = "NUMBER(1)", # could also consider `NUMBER(1)` possibly with constraint `COLNAME NUMBER(1) CHECK (COLNAME IN (0, 1))`
"BOOLEAN"
))
} else {
if (is.factor(column)) {
maxLength <-
Expand Down Expand Up @@ -258,7 +263,7 @@ insertTable.default <- function(connection,
if (dbms == "bigquery" && useCtasHack && is.null(tempEmulationSchema)) {
abort("tempEmulationSchema is required to use insertTable with bigquery when inserting into a new table")
}
sqlDataTypes <- sapply(data, getSqlDataTypes)
sqlDataTypes <- sapply(data, getSqlDataTypes, dbms = dbms)
sqlTableDefinition <- paste(.sql.qescape(names(data), TRUE), sqlDataTypes, collapse = ", ")
sqlTableName <- .sql.qescape(tableName, TRUE, quote = "")
sqlFieldNames <- paste(.sql.qescape(names(data), TRUE), collapse = ",")
Expand All @@ -276,7 +281,15 @@ insertTable.default <- function(connection,
}

if (createTable && !useCtasHack) {
# temporary translation for boolean types. move this to sql render.
# if (dbms == "sql server") {
# print("custom translation")
# sqlTableDefinition <- gsub("BOOLEAN", "BIT", sqlTableDefinition)
# print(sqlTableDefinition)
# }

sql <- paste("CREATE TABLE ", sqlTableName, " (", sqlTableDefinition, ");", sep = "")
print(sql)
renderTranslateExecuteSql(
connection = connection,
sql = sql,
Expand Down
Binary file modified inst/java/DatabaseConnector.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion java/DatabaseConnector.jardesc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="WINDOWS-1252" standalone="no"?>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<jardesc>
<jar path="DatabaseConnector/inst/java/DatabaseConnector.jar"/>
<options buildIfNeeded="true" compress="true" descriptionLocation="/DatabaseConnector/java/DatabaseConnector.jardesc" exportErrors="true" exportWarnings="true" includeDirectoryEntries="false" overwrite="true" saveDescription="true" storeRefactorings="false" useSourceFolders="false"/>
Expand Down
20 changes: 10 additions & 10 deletions java/org/ohdsi/databaseConnector/BatchedQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -148,17 +148,21 @@ public BatchedQuery(Connection connection, String query, String dbms) throws SQL
int type = metaData.getColumnType(columnIndex + 1);
String className = metaData.getColumnClassName(columnIndex + 1);

//System.out.println("======================== debug ====================");
//System.out.println("type= " + type);
//System.out.println("className= " + className);
//System.out.println("columnSqlTypes[columnIndex]= " + columnSqlTypes[columnIndex]);
//System.out.println("Types.BOOLEAN=" + Types.BOOLEAN);
System.out.println("======================== debug ====================");
System.out.println("type= " + type);
System.out.println("className= " + className);
System.out.println("columnSqlTypes[columnIndex]= " + columnSqlTypes[columnIndex]);
System.out.println("Types.BOOLEAN=" + Types.BOOLEAN);


//Types.BOOLEAN is 16 but for a boolean datatype in the database type is -7.
int precision = metaData.getPrecision(columnIndex + 1);
System.out.println("precision=" + precision);
int scale = metaData.getScale(columnIndex + 1);
if (type == Types.INTEGER || type == Types.SMALLINT || type == Types.TINYINT
if (type == Types.BOOLEAN || className.equals("java.lang.Boolean") || columnSqlTypes[columnIndex] == "bool"
|| (dbms.equals("oracle") && className.equals("java.math.BigDecimal") && precision == 1))
columnTypes[columnIndex] = BOOLEAN;
else if (type == Types.INTEGER || type == Types.SMALLINT || type == Types.TINYINT
|| (dbms.equals("oracle") && className.equals("java.math.BigDecimal") && precision > 0 && precision != 19 && scale == 0))
columnTypes[columnIndex] = INTEGER;
else if (type == Types.BIGINT
Expand All @@ -170,10 +174,6 @@ else if (type == Types.DATE)
columnTypes[columnIndex] = DATE;
else if (type == Types.TIMESTAMP)
columnTypes[columnIndex] = DATETIME;
else if (type == Types.BOOLEAN || className.equals("java.lang.Boolean") || columnSqlTypes[columnIndex] == "bool") {
System.out.println("Setting boolean type.");
columnTypes[columnIndex] = BOOLEAN;
}
else
columnTypes[columnIndex] = STRING;
}
Expand Down
48 changes: 14 additions & 34 deletions tests/testthat/test-insertTable.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ makeRandomStrings <- function(n = 1, lenght = 12) {
return(randomString)
}
bigInts <- bit64::runif64(length(dayseq))
booleans <- sample(c(T, F), size = length(dayseq), replace = T)
data <- data.frame(
start_date = dayseq,
some_datetime = timeSeq,
person_id = as.integer(round(runif(length(dayseq), 1, 1e+07))),
value = runif(length(dayseq)),
id = makeRandomStrings(length(dayseq)),
big_ints = bigInts,
booleans = booleans,
stringsAsFactors = FALSE
)

Expand All @@ -39,9 +41,11 @@ data$value[2] <- NA
data$id[3] <- NA
data$big_ints[7] <- NA
data$big_ints[8] <- 3.3043e+10

data$booleans[c(3,9)] <- NA
testServer = testServers[[4]]
for (testServer in testServers) {
test_that(addDbmsToLabel("Insert data", testServer), {
skip_if(testServer$connectionDetails$dbms == "oracle") # Booleans are passed to and from Oracle but NAs are not persevered. still need to fix that.
if (testServer$connectionDetails$dbms %in% c("redshift", "bigquery")) {
# Inserting on RedShift or BigQuery is slow (Without bulk upload), so
# taking subset:
Expand All @@ -54,6 +58,7 @@ for (testServer in testServers) {
options(sqlRenderTempEmulationSchema = testServer$tempEmulationSchema)
on.exit(dropEmulatedTempTables(connection))
on.exit(disconnect(connection), add = TRUE)
# debugonce(insertTable)
insertTable(
connection = connection,
tableName = "#temp",
Expand All @@ -63,9 +68,11 @@ for (testServer in testServers) {
)

# Check data on server is same as local
dataCopy2 <- renderTranslateQuerySql(connection, "SELECT * FROM #temp;", integer64AsNumeric = FALSE)
dataCopy2 <- renderTranslateQuerySql(connection, "SELECT * FROM #temp;", integer64AsNumeric = FALSE)
names(dataCopy2) <- tolower(names(dataCopy2))
dataCopy1 <- data[order(dataCopy1$person_id), ]
# dplyr::tibble(dataCopy1)
# dplyr::tibble(dataCopy2)
dataCopy1 <- dataCopy1[order(dataCopy1$person_id), ]
dataCopy2 <- dataCopy2[order(dataCopy2$person_id), ]
row.names(dataCopy1) <- NULL
row.names(dataCopy2) <- NULL
Expand All @@ -79,13 +86,13 @@ for (testServer in testServers) {
dbClearResult(res)
dbms <- testServer$connectionDetails$dbms
if (dbms == "postgresql") {
expect_equal(as.character(columnInfo$field.type), c("date", "timestamp", "int4", "numeric", "varchar", "int8"))
expect_equal(as.character(columnInfo$field.type), c("date", "timestamp", "int4", "numeric", "varchar", "int8", "bool"))
} else if (dbms == "sql server") {
expect_equal(as.character(columnInfo$field.type), c("date", "datetime2", "int", "float", "varchar", "bigint"))
expect_equal(as.character(columnInfo$field.type), c("date", "datetime2", "int", "float", "varchar", "bigint", "bit"))
} else if (dbms == "oracle") {
expect_equal(as.character(columnInfo$field.type), c("DATE", "TIMESTAMP", "NUMBER", "NUMBER", "VARCHAR2", "NUMBER"))
expect_equal(as.character(columnInfo$field.type), c("DATE", "TIMESTAMP", "NUMBER", "NUMBER", "VARCHAR2", "NUMBER", "NUMBER"))
} else if (dbms == "redshift") {
expect_equal(as.character(columnInfo$field.type), c("date", "timestamp", "int4", "float8", "varchar", "int8" ))
expect_equal(as.character(columnInfo$field.type), c("date", "timestamp", "int4", "float8", "varchar", "int8", "bool"))
} else if (dbms == "sqlite") {
expect_equal(as.character(columnInfo$type), c("double", "double", "integer", "double", "character", "double"))
} else if (dbms == "duckdb") {
Expand All @@ -112,30 +119,3 @@ test_that("Logging insertTable times", {
unlink(logFileName)
})

data <- data.frame(
id = 1:3,
isPrime = c(NA, FALSE, TRUE)
)

for (testServer in testServers) {
test_that(addDbmsToLabel("Converting logical to numeric in insertTable", testServer), {
connection <- connect(testServer$connectionDetails)
options(sqlRenderTempEmulationSchema = testServer$tempEmulationSchema)
on.exit(dropEmulatedTempTables(connection))
on.exit(disconnect(connection), add = TRUE)
expect_warning(
insertTable(
connection = connection,
tableName = "#temp",
data = data,
createTable = TRUE,
tempTable = TRUE
),
"Column 'isPrime' is of type 'logical'")
data2 <- renderTranslateQuerySql(connection, "SELECT * FROM #temp;")
data$isPrime <- as.numeric(data$isPrime)
names(data2) <- tolower(names(data2))
data2 <- data2[order(data2$id), ]
expect_equal(data, data2, check.attributes = FALSE)
})
}

0 comments on commit b5b5c26

Please sign in to comment.