generated from sib-swiss/course_website_template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Deployed 3d9ada0 with MkDocs version: 1.5.3
- Loading branch information
Unknown
committed
Jan 22, 2024
0 parents
commit 00a5c73
Showing
100 changed files
with
24,462 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#----------------- | ||
#----------------- | ||
# Introduction to statistics with R | ||
# 6-9 February 2023 | ||
# In Lausanne | ||
#----------------- | ||
#----------------- | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 1st exercise | ||
#----------------- | ||
|
||
library(ISwR) | ||
?hellung | ||
data(hellung) | ||
|
||
summary(hellung) | ||
glucose <- hellung$glucose | ||
mean(hellung$glucose) | ||
sd(hellung$glucose) | ||
mean(hellung$conc) | ||
sd(hellung$conc) | ||
mean(hellung$diameter) | ||
sd(hellung$diameter) | ||
|
||
means.hellung <- c() | ||
for (i in 1:3){ | ||
means.hellung <- c(means.hellung, mean(hellung[,i])) | ||
} | ||
means.hellung | ||
|
||
stdev.hellung <- c() | ||
for (i in 1:3){ | ||
stdev.hellung <- c(stdev.hellung, sd(hellung[,i])) | ||
} | ||
stdev.hellung | ||
|
||
par(mfrow=c(2,2)) # for viewing multiple plots (2 rows x 2 columns = 4 plots) | ||
hist(hellung$conc, main="a title", xlab="concentration", ylab="freq") | ||
hist(hellung$diameter, col="red") | ||
hist(hellung$glucose) | ||
hist(hellung$conc, breaks=20) | ||
|
||
par(mfrow=c(1,2)) # for viewing multiple plots | ||
boxplot(conc ~ glucose, data=hellung) | ||
boxplot(diameter ~ glucose, data=hellung) | ||
boxplot(hellung$conc ~ hellung$glucose) | ||
boxplot(hellung$diameter ~ hellung$glucose) | ||
|
||
cor(hellung$conc, hellung$diameter) | ||
plot(diameter ~ conc, data=hellung) | ||
cor(log(hellung$conc), hellung$diameter) | ||
plot(diameter ~ log(conc), data=hellung) | ||
|
||
# a fancy plot with ggplot :) | ||
library(ggplot2) | ||
ggplot(hellung, aes(x=log(conc), y=diameter)) + geom_point() | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 2nd exercise | ||
#----------------- | ||
|
||
setwd("/Users/Rachel/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/") | ||
data <- read.csv("data.csv") | ||
|
||
data | ||
summary(data) | ||
sd(data[,1]); sd(data[,2]); sd(data[,3]) | ||
|
||
datatoplot <- data[,1] | ||
|
||
## Plot 4 rows of graphs on one plot | ||
par(mfrow=c(4,1)) | ||
dev.off() | ||
# 1st plot: individual points on the x-axis; random noise on the | ||
# y-axis so that points are not too much superimposed | ||
plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot)) | ||
|
||
# 2nd plot: histogram, with the density line superimposed | ||
hist(datatoplot, xlim=range(datatoplot), breaks=20) | ||
lines(density(datatoplot)) | ||
|
||
# 3rd plot: average +/- Sd | ||
plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a") | ||
arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3) | ||
|
||
# 4th plot: boxplot | ||
boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot)) | ||
|
||
|
||
student <- read.table("students.csv",header=TRUE,sep=",") | ||
student[,8]<- as.numeric(gsub(",",".",student[,8])) | ||
student[,1]<- as.factor(student[,1]) | ||
student[,5]<- as.factor(student[,5]) | ||
student[,6]<- as.factor(student[,6]) | ||
student[student[,"leftrighthanded"] =="D","leftrighthanded"] <- "R" | ||
student[student[,"leftrighthanded"] =="G","leftrighthanded"] <- "L" | ||
|
||
student[student[,"smoker"] =="NS ","smoker"] <- "NS" | ||
student[student[,"smoker"] =="nS","smoker"] <- "NS" | ||
student$smoker <- factor(student$smoker,levels=unique(student$smoker)) | ||
student$leftrighthanded <- as.factor(as.character(student$leftrighthanded)) | ||
summary(student) | ||
|
||
student[student[,"height"] ==1.77,"height"] <-177 | ||
plot(student$height,student$weight,col=student$gender) | ||
boxplot(height~gender,data=student) | ||
boxplot(weight~gender,data=student) | ||
hist(student$weight) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
#----------------- | ||
#----------------- | ||
# Introduction to statistics with R | ||
# 6 - 9 February 2023 | ||
# In Lausanne | ||
#----------------- | ||
#----------------- | ||
|
||
library(faraway) | ||
library(ggpubr) | ||
library(ISwR) | ||
library(rstatix) | ||
library(tidyverse) | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
#----------------- | ||
# 1st exercise | ||
#----------------- | ||
|
||
data(intake) | ||
?intake | ||
attach(intake) | ||
intake | ||
names(intake) | ||
summary(intake) | ||
mean(intake$pre) | ||
sd(intake$pre) | ||
|
||
# Assumption: no significant outliers in the data | ||
|
||
ggboxplot(intake$pre, width = 0.5, add = c("mean","jitter"), | ||
ylab = "premenstrual intake", xlab = F) | ||
|
||
identify_outliers(as.data.frame(intake$pre)) | ||
|
||
# Assumption: normality | ||
qqplot | ||
qqline | ||
ggqqplot(intake,"pre") | ||
|
||
shapiro_test(intake$pre) | ||
|
||
# t test | ||
|
||
t.test(pre, mu=7725) | ||
t.test(pre, mu=7725, alternative="less") | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 2nd exercise: two samples t-test | ||
#----------------- | ||
|
||
data(energy) | ||
?energy | ||
energy | ||
|
||
# assumption 1: data in each group are normally distributed. | ||
|
||
ind.obese <- which(energy$stature == "obese") | ||
ind.lean <- which(energy$stature == "lean") | ||
|
||
shapiro_test(energy$expend[ind.obese]) | ||
shapiro_test(energy$expend[ind.lean]) | ||
|
||
# assumption 2: the variances for the two independent groups are equal. | ||
|
||
levene_test(energy, expend~stature) | ||
# if the command above does not work, use the levene test from the car package | ||
car::leveneTest(expend~stature, energy, center = "median") | ||
|
||
# t test | ||
|
||
t.test(energy$expend ~ energy$stature,var.equal=TRUE) | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 3rd exercise: paired t-test | ||
#----------------- | ||
|
||
data(intake) | ||
?intake | ||
attach(intake) | ||
intake | ||
|
||
# assumption 1: Each of the paired measurements must be obtained from the same subject | ||
# check your sampling design ! | ||
|
||
# assumption 2: The measured differences are normally distributed. | ||
|
||
intake.diff <- intake$post - intake$pre | ||
#intake.diff.df <- as.data.frame(intake.diff) | ||
|
||
shapiro_test(intake.diff) | ||
|
||
# t test | ||
|
||
t.test(intake$pre, intake$post, paired = T) | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 4th exercise: simulations of datasets | ||
#----------------- | ||
## change these how you want! | ||
KO <- rnorm(1000, mean=0,sd=1) | ||
WT <- rnorm(10, mean=4, sd=1) | ||
KO <- as.data.frame(KO) | ||
names(KO) <- "weight" | ||
KO$genotype <- "KO" | ||
WT <- as.data.frame(WT) | ||
names(WT) <- "weight" | ||
WT$genotype <- "WT" | ||
|
||
KO_WT <- rbind(KO,WT) | ||
|
||
boxplot(KO_WT$weight ~ KO_WT$genotype, main="Mice weight at 18 weeks", xlab="", ylab="") | ||
|
||
res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype) | ||
res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T) | ||
|
||
res.welch.test | ||
res.t.test | ||
|
||
res.welch.test$p.value | ||
res.t.test$p.value | ||
|
||
sim.p.welch.test <- NULL | ||
sim.p.t.test <- NULL | ||
|
||
for (i in 1:1000) { | ||
KO <- runif(10, min=27, max=34) | ||
WT <- runif(10, min=27, max=34) | ||
KO <- as.data.frame(KO) | ||
names(KO)[1] <- "weight" | ||
KO$genotype <- "KO" | ||
WT <- as.data.frame(WT) | ||
names(WT)[1] <- "weight" | ||
WT$genotype <- "WT" | ||
|
||
KO_WT <- rbind(KO,WT) | ||
|
||
res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype) | ||
res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T) | ||
|
||
sim.p.welch.test <- c(sim.p.welch.test, res.welch.test$p.value) | ||
sim.p.t.test <- c(sim.p.t.test, res.t.test$p.value) | ||
} | ||
|
||
str(sim.p.welch.test) | ||
str(sim.p.t.test) | ||
sum(sim.p.welch.test < 0.05) | ||
sum(sim.p.t.test < 0.05) | ||
plot(sim.p.t.test,sim.p.welch.test) | ||
adj.bonf <- p.adjust(sim.p.welch.test, method="bonf") | ||
sum(adj.bonf < 0.05) | ||
adj.BH <- p.adjust(sim.p.welch.test, method="BH") | ||
sum(adj.BH < 0.05) | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 5th exercise: ANOVA | ||
#----------------- | ||
|
||
data(coagulation) | ||
|
||
# check the data | ||
summary(coagulation) | ||
get_summary_stats(group_by(coagulation,diet),coag, type = "mean_sd") | ||
coagulation %>% group_by(diet) %>% get_summary_stats(coag, type = "mean_sd") | ||
|
||
boxplot(coagulation$coag ~ coagulation$diet) | ||
|
||
ggboxplot(coagulation, x="diet",y="coag") | ||
|
||
# check normality | ||
|
||
?ggqqplot | ||
ggqqplot(coagulation[coagulation$diet=="A",], "coag") | ||
ggqqplot(coagulation[coagulation$diet=="B",], "coag") | ||
ggqqplot(coagulation[coagulation$diet=="C",], "coag") | ||
ggqqplot(coagulation[coagulation$diet=="D",], "coag") | ||
|
||
shapiro.test(coagulation[coagulation$diet=="A","coag"]) | ||
shapiro.test(coagulation[coagulation$diet=="B","coag"]) | ||
shapiro.test(coagulation[coagulation$diet=="C","coag"]) | ||
shapiro.test(coagulation[coagulation$diet=="D","coag"]) | ||
|
||
# check variance equality | ||
levene_test(coagulation,coag~diet) | ||
# if the command above does not work, use the levene test from the car package | ||
car::leveneTest(coag~diet, coagulation, center = "median") | ||
|
||
# do anova | ||
anova_diet <- aov(coagulation$coag~coagulation$diet) | ||
|
||
summary(anova_diet) | ||
|
||
# check pairwise | ||
|
||
TukeyHSD(anova_diet) | ||
|
||
pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="bonf") | ||
pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="BH") |
Oops, something went wrong.