Add Lecture 21 script and simplify notation/terms in Lecture 22.

brianlukoff · Apr 21, 2021 · 78702a9 · 78702a9
1 parent 2cb3f78
commit 78702a9
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@
 | Lecture 18: Indicator Variables and Interactions | [Slides](slides/lecture-18/lecture-18.pdf) | [Script](lecture-scripts/lecture-18.R) |
 | Lecture 19: Logistic Regression 1 | [Slides](slides/lecture-19/lecture-19.pdf) | [Script](lecture-scripts/lecture-19.R) | 
 | Lecture 20: Logistic Regression 2 | [Slides](slides/lecture-20/lecture-20.pdf) | [Script](lecture-scripts/lecture-20.R) | 
-| Lecture 21: Problems with p-values | [Slides](slides/lecture-21/lecture-21.pdf) |  | 
+| Lecture 21: Problems with p-values | [Slides](slides/lecture-21/lecture-21.pdf) | [Script](lecture-scripts/lecture-21.R) | 
 | Lecture 20: Training and test sets | [Slides](slides/lecture-22/lecture-22.pdf) |  | 
 
 # R help pages

diff --git a/lecture-scripts/lecture-21.R b/lecture-scripts/lecture-21.R
@@ -0,0 +1,7 @@
+mystery <- read.csv("https://raw.githubusercontent.com/brianlukoff/sta371g/master/data/mystery.csv")
+
+model <- lm(Y ~ ., data=mystery)
+summary(model)
+
+model2 <- lm(Y ~ X10 + X13 + X16, data=mystery)
+summary(model2)
diff --git a/slides/lecture-22/lecture-22.Rnw b/slides/lecture-22/lecture-22.Rnw
@@ -327,12 +327,12 @@
       @
       The training set average error is:
       <<>>=
-      mean(abs(resid(model)))
+      mean(abs(residuals(model)))
       @
       The test set average error comes from manually computing the prediction error for each case in the test set:
       <<>>=
-      price.hat <- predict(model, test.set)
-      mean(abs(test.set$Price - price.hat))
+      predicted.prices <- predict(model, test.set)
+      mean(abs(test.set$Price - predicted.prices))
       @
     \end{frame}
 
@@ -341,7 +341,7 @@
         \item Similarly, we can compare the $R^2$ from the training set to the $R^2$ that we would get by predicting prices for cases in the test set.
         \item Recall that $R^2 = \text{cor}(Y,\hat Y)^2$; we can simulate what $R^2$ would be in the test set by calculating this in the test set:
         <<>>=
-        cor(test.set$Price, price.hat)^2
+        cor(test.set$Price, predicted.prices)^2
         @
 
         \item Compare this to what $R^2$ is in the training set:
@@ -361,7 +361,8 @@
     \begin{frame}[fragile]
       \fontsm
       <<>>=
-      logmodel <- lm(I(log(Price)) ~ Living.Area + Land.Value, 
+      logPrice <- log(training.set$Price)
+      logmodel <- lm(logPrice ~ Living.Area + Land.Value, 
                        data=training.set)
       predict.training <- exp(predict(logmodel))
       predict.test <- exp(predict(logmodel, test.set))

diff --git a/slides/lecture-22/lecture-22.pdf b/slides/lecture-22/lecture-22.pdf
diff --git a/slides/lecture-22/lecture-22.tex b/slides/lecture-22/lecture-22.tex
@@ -346,7 +346,7 @@
 \begin{knitrout}
 \definecolor{shadecolor}{rgb}{0.137, 0.137, 0.137}\color{fgcolor}\begin{kframe}
 \begin{alltt}
-\hlkwd{mean}\hlstd{(}\hlkwd{abs}\hlstd{(}\hlkwd{resid}\hlstd{(model)))}
+\hlkwd{mean}\hlstd{(}\hlkwd{abs}\hlstd{(}\hlkwd{residuals}\hlstd{(model)))}
 \end{alltt}
 \begin{verbatim}
 [1] 47685.76
@@ -357,8 +357,8 @@
 \begin{knitrout}
 \definecolor{shadecolor}{rgb}{0.137, 0.137, 0.137}\color{fgcolor}\begin{kframe}
 \begin{alltt}
-\hlstd{price.hat} \hlkwb{<-} \hlkwd{predict}\hlstd{(model, test.set)}
-\hlkwd{mean}\hlstd{(}\hlkwd{abs}\hlstd{(test.set}\hlopt{$}\hlstd{Price} \hlopt{-} \hlstd{price.hat))}
+\hlstd{predicted.prices} \hlkwb{<-} \hlkwd{predict}\hlstd{(model, test.set)}
+\hlkwd{mean}\hlstd{(}\hlkwd{abs}\hlstd{(test.set}\hlopt{$}\hlstd{Price} \hlopt{-} \hlstd{predicted.prices))}
 \end{alltt}
 \begin{verbatim}
 [1] 47983.22
@@ -374,7 +374,7 @@
 \begin{knitrout}
 \definecolor{shadecolor}{rgb}{0.137, 0.137, 0.137}\color{fgcolor}\begin{kframe}
 \begin{alltt}
-\hlkwd{cor}\hlstd{(test.set}\hlopt{$}\hlstd{Price, price.hat)}\hlopt{^}\hlnum{2}
+\hlkwd{cor}\hlstd{(test.set}\hlopt{$}\hlstd{Price, predicted.prices)}\hlopt{^}\hlnum{2}
 \end{alltt}
 \begin{verbatim}
 [1] 0.5162538
@@ -408,7 +408,8 @@
 \begin{knitrout}
 \definecolor{shadecolor}{rgb}{0.137, 0.137, 0.137}\color{fgcolor}\begin{kframe}
 \begin{alltt}
-\hlstd{logmodel} \hlkwb{<-} \hlkwd{lm}\hlstd{(}\hlkwd{I}\hlstd{(}\hlkwd{log}\hlstd{(Price))} \hlopt{~} \hlstd{Living.Area} \hlopt{+} \hlstd{Land.Value,}
+\hlstd{logPrice} \hlkwb{<-} \hlkwd{log}\hlstd{(training.set}\hlopt{$}\hlstd{Price)}
+\hlstd{logmodel} \hlkwb{<-} \hlkwd{lm}\hlstd{(logPrice} \hlopt{~} \hlstd{Living.Area} \hlopt{+} \hlstd{Land.Value,}
                  \hlkwc{data}\hlstd{=training.set)}
 \hlstd{predict.training} \hlkwb{<-} \hlkwd{exp}\hlstd{(}\hlkwd{predict}\hlstd{(logmodel))}
 \hlstd{predict.test} \hlkwb{<-} \hlkwd{exp}\hlstd{(}\hlkwd{predict}\hlstd{(logmodel, test.set))}