-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathio.R
35 lines (26 loc) · 946 Bytes
/
io.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# ------------------------------------------------------------------------------
# I/O operations
#
# Author: Mario Loaiciga
# ------------------------------------------------------------------------------
library(sparklyr)
load_csv <-function(filename){
# Load a .csv file as local dataframe or distributed dataframe
df <- as.data.frame(read.csv(filename, sep = ","))
return(df)
}
load_distributed_csv <- function(filename, sc, name=NULL) {
# Load a .csv file into a distributed dataframe
df <- spark_read_csv(sc, name=name, path=filename)
return(df)
}
export_csv <-function(df, filename) {
# Export a local dataframe as a .csv file
write.csv(df, filename, row.names=FALSE)
}
export_distributed_csv <- function(df, filename) {
# Export a distributed dataframe as a .csv file
# TODO: Find a way to prevent Spark from wriing the output in file pieces
spark_write_csv(df, filename)
}