-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathch6.R
101 lines (75 loc) · 2.91 KB
/
ch6.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
library(ISwR)
#object of the chapter is to show how to perform:
#*basic regression analysis
#*plots for model checking and display of confidence and prediction intervals.
#*correlation for both parametrics and non-parametric uses
#6.1 Simple Linear regression
attach(thuesen)
print(lm(short.velocity~blood.glucose))
print(summary(lm(short.velocity~blood.glucose)))
plot(blood.glucose, short.velocity)
abline(lm(short.velocity~blood.glucose))
cc <- complete.cases(thuesen)
attach(thuesen[cc,])
lm.velo <- lm(short.velocity~blood.glucose)
print(fitted(lm.velo))
plot(blood.glucose, short.velocity)
lines(blood.glucose[!is.na(short.velocity)], fitted(lm.velo))
lm.velo <- lm(short.velocity~blood.glucose)
segments(blood.glucose, fitted(lm.velo), blood.glucose, short.velocity)
plot(fitted(lm.velo), resid(lm.velo))
qqnorm(resid(lm.velo))
#6.3 Prediction and confidence bands
#confidence bands reflect uncertainty about the line itself, need to
#establish that ybar and bhat are uncorrelated
#prediction bands include uncertainty around future observations
# limits approach withing 2 standard deviations of true line as observations increase,
#small makes curves, relies on the assumption that normally distributed errors w constant variance.
#need to make sure its a reasonable approximation before use.
print(predict(lm.velo))
print(predict(lm.velo, int="c"))
#confidence
print(predict(lm.velo, int="p"))
#prediction
#matlines function to add to scatterplot
pred.frame <- data.frame(blood.glucose=4:20)
pp <- predict(lm.velo, int="p", newdata=pred.frame)
pc <- predict(lm.velo, int="c", newdata=pred.frame)
plot(blood.glucose, short.velocity, ylim=range(short.velocity, pp, na.rm = T))
pred.gluc <- pred.frame$blood.glucose
matlines(pred.gluc, pc, lty=c(1,2,3), col= "black")
matlines(pred.gluc, pp, lty=c(1,3,3), col= "black")
#6.4 Correlation,
#corr. coef. is scale invariant score of association between 2 variable
#Pearson corr. (R2)
#rooted in 2d normal distribution where theoretical correlation of zero
# can transform to a t distribution variable to test for significance of the R2 value
print(cor(blood.glucose, short.velocity, use='complete.obs'))
print(cor.test(blood.glucose, short.velocity))
#Spearman's rho
#nonparametric variant
print(cor.test(blood.glucose, short.velocity, method='spearman'))
#Kendalls's tau
#count concordant and discordant pairs
print(cor.test(blood.glucose, short.velocity, method='kendall'))
#6.5.1
print(rmr)
fit <- lm(metabolic.rate ~ body.weight, data=rmr)
print(summary(fit))
print(predict(fit, newdata=data.frame(body.weight=70)))
print(confint(fit))
#6.2
print(summary(lm(sqrt(igf1)~age, data=juul, subset=age>25)))
#6.3
print(malaria)
print(summary(lm(log(ab)~age, data=malaria)))
plot(log(ab)~age, data=malaria)
#6.4
rho <- .9
n <- 100
x <- rnorm(n)
y <- rnorm(n, rho * x, sqrt(1-rho^2))
plot(x,y)
print(cor.test(x,y))
print(cor.test(x,y, method='spearman'))
print(cor.test(x,y, method='kendall'))