diff --git a/README.md b/README.md index 1f2fc05..df83b05 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ | Lecture 18: Indicator Variables and Interactions | [Slides](slides/lecture-18/lecture-18.pdf) | [Script](lecture-scripts/lecture-18.R) | | Lecture 19: Logistic Regression 1 | [Slides](slides/lecture-19/lecture-19.pdf) | [Script](lecture-scripts/lecture-19.R) | | Lecture 20: Logistic Regression 2 | [Slides](slides/lecture-20/lecture-20.pdf) | [Script](lecture-scripts/lecture-20.R) | -| Lecture 21: Problems with p-values | [Slides](slides/lecture-21/lecture-21.pdf) | | +| Lecture 21: Problems with p-values | [Slides](slides/lecture-21/lecture-21.pdf) | [Script](lecture-scripts/lecture-21.R) | | Lecture 20: Training and test sets | [Slides](slides/lecture-22/lecture-22.pdf) | | # R help pages diff --git a/lecture-scripts/lecture-21.R b/lecture-scripts/lecture-21.R new file mode 100644 index 0000000..267163a --- /dev/null +++ b/lecture-scripts/lecture-21.R @@ -0,0 +1,7 @@ +mystery <- read.csv("https://raw.githubusercontent.com/brianlukoff/sta371g/master/data/mystery.csv") + +model <- lm(Y ~ ., data=mystery) +summary(model) + +model2 <- lm(Y ~ X10 + X13 + X16, data=mystery) +summary(model2) diff --git a/slides/lecture-22/lecture-22.Rnw b/slides/lecture-22/lecture-22.Rnw index 31b664f..3b5dc64 100644 --- a/slides/lecture-22/lecture-22.Rnw +++ b/slides/lecture-22/lecture-22.Rnw @@ -327,12 +327,12 @@ @ The training set average error is: <<>>= - mean(abs(resid(model))) + mean(abs(residuals(model))) @ The test set average error comes from manually computing the prediction error for each case in the test set: <<>>= - price.hat <- predict(model, test.set) - mean(abs(test.set$Price - price.hat)) + predicted.prices <- predict(model, test.set) + mean(abs(test.set$Price - predicted.prices)) @ \end{frame} @@ -341,7 +341,7 @@ \item Similarly, we can compare the $R^2$ from the training set to the $R^2$ that we would get by predicting prices for cases in the test set. \item Recall that $R^2 = \text{cor}(Y,\hat Y)^2$; we can simulate what $R^2$ would be in the test set by calculating this in the test set: <<>>= - cor(test.set$Price, price.hat)^2 + cor(test.set$Price, predicted.prices)^2 @ \item Compare this to what $R^2$ is in the training set: @@ -361,7 +361,8 @@ \begin{frame}[fragile] \fontsm <<>>= - logmodel <- lm(I(log(Price)) ~ Living.Area + Land.Value, + logPrice <- log(training.set$Price) + logmodel <- lm(logPrice ~ Living.Area + Land.Value, data=training.set) predict.training <- exp(predict(logmodel)) predict.test <- exp(predict(logmodel, test.set)) diff --git a/slides/lecture-22/lecture-22.pdf b/slides/lecture-22/lecture-22.pdf index 574d3f1..38fa063 100644 Binary files a/slides/lecture-22/lecture-22.pdf and b/slides/lecture-22/lecture-22.pdf differ diff --git a/slides/lecture-22/lecture-22.tex b/slides/lecture-22/lecture-22.tex index ef547a3..09484de 100644 --- a/slides/lecture-22/lecture-22.tex +++ b/slides/lecture-22/lecture-22.tex @@ -346,7 +346,7 @@ \begin{knitrout} \definecolor{shadecolor}{rgb}{0.137, 0.137, 0.137}\color{fgcolor}\begin{kframe} \begin{alltt} -\hlkwd{mean}\hlstd{(}\hlkwd{abs}\hlstd{(}\hlkwd{resid}\hlstd{(model)))} +\hlkwd{mean}\hlstd{(}\hlkwd{abs}\hlstd{(}\hlkwd{residuals}\hlstd{(model)))} \end{alltt} \begin{verbatim} [1] 47685.76 @@ -357,8 +357,8 @@ \begin{knitrout} \definecolor{shadecolor}{rgb}{0.137, 0.137, 0.137}\color{fgcolor}\begin{kframe} \begin{alltt} -\hlstd{price.hat} \hlkwb{<-} \hlkwd{predict}\hlstd{(model, test.set)} -\hlkwd{mean}\hlstd{(}\hlkwd{abs}\hlstd{(test.set}\hlopt{$}\hlstd{Price} \hlopt{-} \hlstd{price.hat))} +\hlstd{predicted.prices} \hlkwb{<-} \hlkwd{predict}\hlstd{(model, test.set)} +\hlkwd{mean}\hlstd{(}\hlkwd{abs}\hlstd{(test.set}\hlopt{$}\hlstd{Price} \hlopt{-} \hlstd{predicted.prices))} \end{alltt} \begin{verbatim} [1] 47983.22 @@ -374,7 +374,7 @@ \begin{knitrout} \definecolor{shadecolor}{rgb}{0.137, 0.137, 0.137}\color{fgcolor}\begin{kframe} \begin{alltt} -\hlkwd{cor}\hlstd{(test.set}\hlopt{$}\hlstd{Price, price.hat)}\hlopt{^}\hlnum{2} +\hlkwd{cor}\hlstd{(test.set}\hlopt{$}\hlstd{Price, predicted.prices)}\hlopt{^}\hlnum{2} \end{alltt} \begin{verbatim} [1] 0.5162538 @@ -408,7 +408,8 @@ \begin{knitrout} \definecolor{shadecolor}{rgb}{0.137, 0.137, 0.137}\color{fgcolor}\begin{kframe} \begin{alltt} -\hlstd{logmodel} \hlkwb{<-} \hlkwd{lm}\hlstd{(}\hlkwd{I}\hlstd{(}\hlkwd{log}\hlstd{(Price))} \hlopt{~} \hlstd{Living.Area} \hlopt{+} \hlstd{Land.Value,} +\hlstd{logPrice} \hlkwb{<-} \hlkwd{log}\hlstd{(training.set}\hlopt{$}\hlstd{Price)} +\hlstd{logmodel} \hlkwb{<-} \hlkwd{lm}\hlstd{(logPrice} \hlopt{~} \hlstd{Living.Area} \hlopt{+} \hlstd{Land.Value,} \hlkwc{data}\hlstd{=training.set)} \hlstd{predict.training} \hlkwb{<-} \hlkwd{exp}\hlstd{(}\hlkwd{predict}\hlstd{(logmodel))} \hlstd{predict.test} \hlkwb{<-} \hlkwd{exp}\hlstd{(}\hlkwd{predict}\hlstd{(logmodel, test.set))}