-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathch3.R
132 lines (110 loc) · 4.17 KB
/
ch3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
library(ISwR)
# Ch 3
# Probability and distributions
#
# Most Experiments are not perfectly reproducable,
#so accuracy, distributions and probability are important to know
#
#learn basic probability and functions in R for random sampling and
#handling distribtuions
#
#Simulate card shuffling with the sample function
print(sample(1:40, 5))
#defaults to sample w/o replacement
#replace = T, can simulate coin tosses
print(sample(c("H", "T"), 10, replace = TRUE, prob = NULL))
#prob arg uses to assign nonequalt probabilities
print(sample(c("Success", "Failure"), 10, replace = TRUE, prob = c(0.9, 0.1)))
#for two outcomes, you should actually use a binomial distribution
#prod fxn gives product of a vector
print('chance of the sample(1:40, 5) being selected')
print(1/prod(40:36))
print('chance of the sample(1:40, 5) being selected if order doesnt matter')
print(prod(5:1)/prod(40:36))
#this function is equivalent to n(40) choose k(5) how to calculate
#the number of ways to choose 5 out of 40. "from 40 choose 5" "from n choose k"
#(40) = 40! / (5!*35!) = 658008
#(5)
#use the choose fxn:
print(1/choose(40, 5))
#Discrete distributions
#integers or classes only selected for the random variable
#Random variable X has a probability distribution using point probas
#f(x)=P(X=x) or cumulative distribution fxn F(x)=P(X=<x)
#Discrete gives dist without point probas:
#f(x) = (n choose x) p^x*(1-p)^(n-x)
#aka binomial dist, n x binomial coefs
#p = prob of success outcome
#since there is 0 chance of infinate points from getting selected,
#you need a probability density function
#F(x)= S x to -inf f(x) dx
#uniform dist has a constant probability density.
#normal distribution:
#f(x) = 1/sqrt(2*pi*stddev) * e^(-(x-mean(x))^2/(2*stddev^2))
#gives a bell shape
#four items can be calulated for a distribution:
#*Density/point proba (dnorm)
#Cumulative probability/dist fxn (pnorm)
#quantiles (qnorm)
#pseudo random numbers (rnorm)
#bell curve density dist.
x <- seq(-4, 4, 0.1)
plot(x, dnorm(x), type='l')
#pin diagram for distinct values
#binomial dist with n=50, p=0.33
x <- 0:50
plot(x, dbinom(x, size=50, prob=.33), type='h')
#cumulative dist fxn
print(1-pnorm(160, mean = 132, sd=13))
print(1-pbinom(15, size = 20, prob=.5))
print(1-pbinom(15, size = 20, prob=.5)+pbinom(4, 20, .5))
#quantiles-inverse of the cumulative dist fxn, used to calculate
#confidence intervals or power calculations in experimental design
#
#n normally dist observations, with the same mean and std dev
#average reading xbar will be around pop mean,
#with std dev of pop standard dev/sqrt(n)
#95% confidence interval for pop mean:
#xbar + sigma/sqrt(n)*qnorm(0.025)=< u =< xbar+sigma/sqrt(n)*qnorm(0.975)
#0.025 = 2.5% quantile, 0.975 = 97.5% quantile
xbar <- 83
sigma <- 12
n <- 5
sem <- sigma/sqrt(n)
print(sem)
print(xbar + sem*qnorm(0.025))
print(xbar + sem*qnorm(0.975))
#quantiles are also used for q-q plots, used to asses if a set of data
#can be assumed to come from a certain distribution
print(rnorm(10))
print(rnorm(10, mean = 7, sd=5))
print(rbinom(10, size=20, prob=.5))
#q1: what is the probability of:
#a) a standard normally distributed variable is larger than 3? yes
print(1 - pnorm(3))
#b) a normally distributed variable with mean of 35 and standard deviation 6 is larger than 42 yes
print(1-pnorm(42, mean=35, sd = 6))
#c) getting 10 out of 10 sucesses in a binomial distribution with a probability of 0.8
#print(pbinom(9, size = 10, prob = 0.8))#wrong
print(dbinom(10, size = 10, prob = 0.8))#correct
#d)X < 0.9 when X has the standard uniform distribution yes
print(punif(0.9))
#e)X > 6.5 in a chisquared distribution with 2 degrees of freedom. yes
print(1-pchisq(6.5, 2))
print(1-dbinom(1, size = 100, prob = 0.01))
x <- 0:1000
plot(x, 1-pbinom(1, size=x, prob=.01), type='h')
print('check')
#2. 5% of the normal distribution lies outside 2 standard deviations
#is this true? what about 1%? 0.5%, and 0.1%?
#what are the quantile positions meaured in statistical deviations?
print(pnorm(-2)*2)
print(qnorm(1-.01/2))
print(qnorm(1-.005/2))
print(qnorm(1-.001/2))
print(qnorm(0.25))
print(qnorm(0.75))
#3.3
print(dbinom(0, size = 10, prob = 0.2))
#3.4 simulate cointoss with rbinom
print(rbinom(10, size=1, prob = 0.5))