generated from sib-swiss/course_website_template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Deployed 197f9fd with MkDocs version: 1.6.1
- Loading branch information
Unknown
committed
Jan 7, 2025
0 parents
commit 7a0359f
Showing
108 changed files
with
24,977 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
#----------------- | ||
#----------------- | ||
# Introduction to statistics with R | ||
# 2024 | ||
# In Lausanne | ||
#----------------- | ||
#----------------- | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 1st exercise | ||
#----------------- | ||
|
||
library(ISwR) | ||
?hellung | ||
data(hellung) | ||
|
||
summary(hellung) | ||
|
||
glucose <- hellung$glucose | ||
mean(hellung$glucose) | ||
sd(hellung$glucose) | ||
mean(hellung$conc) | ||
sd(hellung$conc) | ||
mean(hellung$diameter) | ||
sd(hellung$diameter) | ||
|
||
means.hellung <- c() | ||
for (i in 1:3){ | ||
means.hellung <- c(means.hellung, mean(hellung[,i])) | ||
print(means.hellung) | ||
} | ||
means.hellung | ||
|
||
stdev.hellung <- c() | ||
for (i in 1:3){ | ||
stdev.hellung <- c(stdev.hellung, sd(hellung[,i])) | ||
} | ||
stdev.hellung | ||
|
||
par(mfrow=c(2,2)) # for viewing multiple plots (2 rows x 2 columns = 4 plots) | ||
hist(hellung$conc, main="Hellung data", xlab="concentration", ylab="freq") | ||
hist(hellung$diameter, col="red") | ||
hist(hellung$glucose) | ||
hist(hellung$conc, breaks=20) | ||
|
||
par(mfrow=c(1,2)) # for viewing multiple plots | ||
boxplot( conc ~ glucose,data=hellung) | ||
boxplot(diameter ~ glucose, data=hellung) | ||
boxplot(hellung$conc ~ hellung$glucose) | ||
boxplot(hellung$diameter ~ hellung$glucose) | ||
|
||
|
||
cor(hellung$conc, hellung$diameter) | ||
plot(diameter ~ conc, data=hellung) | ||
cor(log(hellung$conc), hellung$diameter) | ||
plot(diameter ~ log(conc), data=hellung) | ||
|
||
# a fancy plot with ggplot :) | ||
library(ggplot2) | ||
ggplot(hellung, aes(x=log(conc), y=diameter)) + geom_point() | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 2nd exercise | ||
#----------------- | ||
|
||
setwd("/Users/Rachel/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/") | ||
data <- read.csv("data.csv") | ||
|
||
|
||
data | ||
summary(data) | ||
sd(data[,1]); sd(data[,2]); sd(data[,3]) | ||
|
||
datatoplot <- data[,1] | ||
#pdf("datanumber1.pdf") | ||
## Plot 4 rows of graphs on one plot | ||
par(mfrow=c(4,1)) | ||
|
||
# 1st plot: individual points on the x-axis; random noise on the | ||
# y-axis so that points are not too much superimposed | ||
plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot)) | ||
|
||
# 2nd plot: histogram, with the density line superimposed | ||
hist(datatoplot, xlim=range(datatoplot), breaks=20) | ||
lines(density(datatoplot)) | ||
|
||
# 3rd plot: average +/- Sd | ||
plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a") | ||
arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3) | ||
|
||
# 4th plot: boxplot | ||
boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot)) | ||
#dev.off() | ||
|
||
## now I am looking at the second column | ||
|
||
datatoplot <- data[,2] | ||
#pdf("datanumber2.pdf") | ||
## Plot 4 rows of graphs on one plot | ||
par(mfrow=c(4,1)) | ||
|
||
# 1st plot: individual points on the x-axis; random noise on the | ||
# y-axis so that points are not too much superimposed | ||
plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot)) | ||
|
||
# 2nd plot: histogram, with the density line superimposed | ||
hist(datatoplot, xlim=range(datatoplot), breaks=20) | ||
lines(density(datatoplot)) | ||
|
||
# 3rd plot: average +/- Sd | ||
plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a") | ||
arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3) | ||
|
||
# 4th plot: boxplot | ||
boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot)) | ||
#dev.off() | ||
|
||
## now I am looking at the third column | ||
|
||
datatoplot <- data[,3] | ||
#pdf("datanumber3.pdf") | ||
## Plot 4 rows of graphs on one plot | ||
par(mfrow=c(4,1)) | ||
|
||
# 1st plot: individual points on the x-axis; random noise on the | ||
# y-axis so that points are not too much superimposed | ||
plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot)) | ||
|
||
# 2nd plot: histogram, with the density line superimposed | ||
hist(datatoplot, xlim=range(datatoplot), breaks=20) | ||
lines(density(datatoplot)) | ||
|
||
# 3rd plot: average +/- Sd | ||
plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a") | ||
arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3) | ||
|
||
# 4th plot: boxplot | ||
boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot)) | ||
#dev.off() | ||
|
||
|
||
## Last exercise | ||
|
||
|
||
student <- read.table("students.csv",header=TRUE,sep=",") | ||
student[,8]<- as.numeric(gsub(",",".",student[,8])) | ||
student[,1]<- as.factor(student[,1]) | ||
student[,5]<- as.factor(student[,5]) | ||
student[,6]<- as.factor(student[,6]) | ||
student[student[,"leftrighthanded"] =="D","leftrighthanded"] <- "R" | ||
student[student[,"leftrighthanded"] =="G","leftrighthanded"] <- "L" | ||
|
||
student[student[,"smoker"] =="NS ","smoker"] <- "NS" | ||
student[student[,"smoker"] =="nS","smoker"] <- "NS" | ||
student$smoker <- factor(student$smoker,levels=unique(student$smoker)) | ||
student$leftrighthanded <- as.factor(as.character(student$leftrighthanded)) | ||
summary(student) | ||
|
||
student[student[,"height"] ==1.77,"height"] <-177 | ||
|
||
plot(student$height,student$weight,col=student$gender) | ||
boxplot(height~gender,data=student) | ||
boxplot(weight~gender,data=student) | ||
hist(student$weight) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
#----------------- | ||
#----------------- | ||
# Introduction to statistics with R | ||
# January 2024 | ||
# In Lausanne | ||
#----------------- | ||
#----------------- | ||
|
||
library(faraway) | ||
library(ggpubr) | ||
library(ISwR) | ||
library(rstatix) | ||
library(tidyverse) | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
#----------------- | ||
# 1st exercise | ||
#----------------- | ||
|
||
data(intake) | ||
?intake | ||
attach(intake) | ||
intake | ||
names(intake) | ||
summary(intake) | ||
mean(intake$pre) | ||
sd(intake$pre) | ||
|
||
# Assumption: no significant outliers in the data | ||
|
||
ggboxplot(intake$pre, width = 0.5, add = c("mean","jitter"), | ||
ylab = "premenstrual intake", xlab = F) | ||
|
||
identify_outliers(as.data.frame(intake$pre)) | ||
|
||
# Assumption: normality | ||
qqplot | ||
qqline | ||
ggqqplot(intake,"pre") | ||
|
||
shapiro_test(intake$pre) | ||
|
||
# t test | ||
|
||
t.test(pre, mu=7725) | ||
t.test(pre, mu=7725, alternative="less") | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 2nd exercise: two samples t-test | ||
#----------------- | ||
|
||
data(energy) | ||
?energy | ||
energy | ||
|
||
# assumption 1: data in each group are normally distributed. | ||
|
||
ind.obese <- which(energy$stature == "obese") | ||
ind.lean <- which(energy$stature == "lean") | ||
|
||
shapiro_test(energy$expend[ind.obese]) | ||
shapiro_test(energy$expend[ind.lean]) | ||
|
||
# assumption 2: the variances for the two independent groups are equal. | ||
|
||
levene_test(energy, expend~stature) | ||
# if the command above does not work, use the levene test from the car package | ||
car::leveneTest(expend~stature, energy, center = "median") | ||
|
||
# t test | ||
|
||
t.test(energy$expend ~ energy$stature,var.equal=TRUE) | ||
|
||
t.test(energy$expend ~ energy$stature) | ||
|
||
t.test(energy$expend[ind.obese],energy$expend[ind.lean]) | ||
t.test(expend~stature,data=energy) | ||
|
||
ggqqplot(energy[ind.lean,], "expend") | ||
identify_outliers(as.data.frame(energy[ind.lean,"expend"])) | ||
|
||
#----------------- | ||
#----------------- | ||
# 3rd exercise: paired t-test | ||
#----------------- | ||
|
||
data(intake) | ||
?intake | ||
attach(intake) | ||
intake | ||
|
||
# assumption 1: Each of the paired measurements must be obtained from the same subject | ||
# check your sampling design ! | ||
|
||
# assumption 2: The measured differences are normally distributed. | ||
|
||
intake.diff <- intake$post - intake$pre | ||
#intake.diff.df <- as.data.frame(intake.diff) | ||
|
||
shapiro_test(intake.diff) | ||
|
||
# t test | ||
|
||
t.test(intake$pre, intake$post, paired = T) | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 4th exercise: simulations of datasets | ||
#----------------- | ||
## change these how you want! | ||
#rnorm | ||
KO <- runif(10, min=27, max=34) | ||
WT <- runif(10, min=27, max=34) | ||
KO <- as.data.frame(KO) | ||
names(KO) <- "weight" | ||
KO$genotype <- "KO" | ||
WT <- as.data.frame(WT) | ||
names(WT) <- "weight" | ||
WT$genotype <- "WT" | ||
|
||
KO_WT <- rbind(KO,WT) | ||
|
||
boxplot(KO_WT$weight ~ KO_WT$genotype, main="Mice weight at 18 weeks", xlab="", ylab="") | ||
|
||
res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype) | ||
res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T) | ||
|
||
res.welch.test | ||
res.t.test | ||
|
||
res.welch.test$p.value | ||
res.t.test$p.value | ||
|
||
sim.p.welch.test <- NULL | ||
sim.p.t.test <- NULL | ||
sim.p.wilcox.test <- NULL | ||
|
||
for (i in 1:1000) { | ||
KO <- runif(3, min=27, max=29) | ||
WT <- runif(3, min=29, max=34) | ||
KO <- as.data.frame(KO) | ||
names(KO)[1] <- "weight" | ||
KO$genotype <- "KO" | ||
WT <- as.data.frame(WT) | ||
names(WT)[1] <- "weight" | ||
WT$genotype <- "WT" | ||
|
||
KO_WT <- rbind(KO,WT) | ||
|
||
res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype) | ||
res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T) | ||
res.wilcox.test <- wilcox.test(KO_WT$weight ~ KO_WT$genotype) | ||
sim.p.welch.test <- c(sim.p.welch.test, res.welch.test$p.value) | ||
sim.p.t.test <- c(sim.p.t.test, res.t.test$p.value) | ||
sim.p.wilcox.test <- c(sim.p.wilcox.test,res.wilcox.test$p.value) | ||
} | ||
|
||
str(sim.p.welch.test) | ||
str(sim.p.t.test) | ||
sum(sim.p.welch.test < 0.05) | ||
sum(sim.p.t.test < 0.05) | ||
plot(sim.p.t.test,sim.p.welch.test) | ||
adj.bonf <- p.adjust(sim.p.welch.test, method="bonf") | ||
sum(adj.bonf < 0.05) | ||
adj.BH <- p.adjust(sim.p.welch.test, method="BH") | ||
sum(adj.BH < 0.05) | ||
|
||
|
||
|
||
#----------------- | ||
#----------------- | ||
# 5th exercise: ANOVA | ||
#----------------- | ||
|
||
data(coagulation) | ||
|
||
# check the data | ||
summary(coagulation) | ||
|
||
get_summary_stats(group_by(coagulation,diet),coag, type = "mean_sd") | ||
|
||
|
||
coagulation %>% group_by(diet) %>% get_summary_stats(coag, type = "mean_sd") | ||
|
||
boxplot(coagulation$coag ~ coagulation$diet) | ||
|
||
ggboxplot(coagulation, x="diet",y="coag") | ||
|
||
# check normality | ||
ind.A <- which(coagulation$diet=="A") | ||
?ggqqplot | ||
ggqqplot(coagulation[ind.A,], "coag") | ||
ggqqplot(coagulation[coagulation$diet=="B",], "coag") | ||
ggqqplot(coagulation[coagulation$diet=="C",], "coag") | ||
ggqqplot(coagulation[coagulation$diet=="D",], "coag") | ||
|
||
shapiro.test(coagulation[coagulation$diet=="A","coag"]) | ||
shapiro.test(coagulation[coagulation$diet=="B","coag"]) | ||
shapiro.test(coagulation[coagulation$diet=="C","coag"]) | ||
shapiro.test(coagulation[coagulation$diet=="D","coag"]) | ||
|
||
# check variance equality | ||
levene_test(coagulation,coag~diet) | ||
# if the command above does not work, use the levene test from the car package | ||
car::leveneTest(coag~diet, coagulation, center = "median") | ||
|
||
# do anova | ||
anova_diet <- aov(coagulation$coag~coagulation$diet) | ||
|
||
summary(anova_diet) | ||
|
||
# check pairwise | ||
|
||
TukeyHSD(anova_diet) | ||
|
||
pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="bonf") | ||
pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="BH") |
Oops, something went wrong.