-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtache_data_processing.R
49 lines (30 loc) · 1.15 KB
/
tache_data_processing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
library(magrittr)
source("families/tache/R/tache_data_processing_functions.R")
xlsx_file <- "families/tache/xlsx/tache_data_cleaned.xlsx"
data_cleaned_file <- "families/tache/json/tache_data_cleaned.json"
collapsed_file <- "families/tache/json/tache_collapsed_branches.json"
# Read all data ----
# Read in data
in_data <- xlsx::read.xlsx2(xlsx_file, "Individuals", check.names=F)
fam_data <- xlsx::read.xlsx2(xlsx_file, "Families", check.names=F)
# NAs
in_data[in_data == ""] <- NA
fam_data[["Collapse"]] <- as.logical(fam_data[["Collapse"]]) %in% TRUE
# Unique IDs
in_data %>% qa
# Cleaned data ----
# Locations
in_data %<>% merge_locations
# Title
in_data %<>% merge_title_with_name
# Set initial positions
in_data %<>% initial_positions
# Populate JSON file
in_data %>%
cleaned_data %>%
jsonlite::write_json(data_cleaned_file, pretty = TRUE, auto_unbox=TRUE)
# Collapsed branches ----
collapsed_branches <- as.list(sapply(fam_data[fam_data[["Collapse"]], "ID"], function(id){
nrow(in_data[in_data$FAMC %in% id,])
}))
jsonlite::write_json(collapsed_branches, collapsed_file, pretty = TRUE, auto_unbox=TRUE)