diff --git a/inputs/data/raw_data.csv b/inputs/data/raw_data.csv new file mode 100644 index 00000000..67ca6458 --- /dev/null +++ b/inputs/data/raw_data.csv @@ -0,0 +1,2 @@ +first_col, second_col, third_col +some, raw, data \ No newline at end of file diff --git a/inputs/literature/Alexander-ProjectOfStateLevel.pdf b/inputs/literature/Alexander-ProjectOfStateLevel.pdf new file mode 100644 index 00000000..b359f15d Binary files /dev/null and b/inputs/literature/Alexander-ProjectOfStateLevel.pdf differ diff --git a/scripts/01-data_cleaning.R b/scripts/01-data_cleaning.R new file mode 100644 index 00000000..2a2b49e3 --- /dev/null +++ b/scripts/01-data_cleaning.R @@ -0,0 +1,36 @@ +#### Preamble #### +# Purpose: Clean the survey data downloaded from [...UPDATE ME!!!!!] +# Author: Rohan Alexander [CHANGE THIS TO YOUR NAME!!!!] +# Data: 3 January 2021 +# Contact: rohan.alexander@utoronto.ca [PROBABLY CHANGE THIS ALSO!!!!] +# License: MIT +# Pre-requisites: +# - Need to have downloaded the ACS data and saved it to inputs/data +# - Don't forget to gitignore it! +# - Change these to yours +# Any other information needed? + + +#### Workspace setup #### +# Use R Projects, not setwd(). +library(haven) +library(tidyverse) +# Read in the raw data. +raw_data <- readr::read_csv("inputs/data/raw_data.csv" + ) +# Just keep some variables that may be of interest (change +# this depending on your interests) +names(raw_data) + +reduced_data <- + raw_data %>% + select(first_col, + second_col) +rm(raw_data) + + +#### What's next? #### + + + + \ No newline at end of file