diff --git a/inputs/data/raw_data.csv b/inputs/data/raw_data.csv
new file mode 100644
index 00000000..67ca6458
--- /dev/null
+++ b/inputs/data/raw_data.csv
@@ -0,0 +1,2 @@
+first_col, second_col, third_col
+some, raw, data
\ No newline at end of file
diff --git a/inputs/literature/Alexander-ProjectOfStateLevel.pdf b/inputs/literature/Alexander-ProjectOfStateLevel.pdf
new file mode 100644
index 00000000..b359f15d
Binary files /dev/null and b/inputs/literature/Alexander-ProjectOfStateLevel.pdf differ
diff --git a/scripts/01-data_cleaning.R b/scripts/01-data_cleaning.R
new file mode 100644
index 00000000..2a2b49e3
--- /dev/null
+++ b/scripts/01-data_cleaning.R
@@ -0,0 +1,36 @@
+#### Preamble ####
+# Purpose: Clean the survey data downloaded from [...UPDATE ME!!!!!]
+# Author: Rohan Alexander [CHANGE THIS TO YOUR NAME!!!!]
+# Data: 3 January 2021
+# Contact: rohan.alexander@utoronto.ca [PROBABLY CHANGE THIS ALSO!!!!]
+# License: MIT
+# Pre-requisites: 
+# - Need to have downloaded the ACS data and saved it to inputs/data
+# - Don't forget to gitignore it!
+# - Change these to yours
+# Any other information needed?
+
+
+#### Workspace setup ####
+# Use R Projects, not setwd().
+library(haven)
+library(tidyverse)
+# Read in the raw data. 
+raw_data <- readr::read_csv("inputs/data/raw_data.csv"
+                     )
+# Just keep some variables that may be of interest (change 
+# this depending on your interests)
+names(raw_data)
+
+reduced_data <- 
+  raw_data %>% 
+  select(first_col, 
+         second_col)
+rm(raw_data)
+         
+
+#### What's next? ####
+
+
+
+         
\ No newline at end of file