Skip to content

Commit

Permalink
Deployed 197f9fd with MkDocs version: 1.6.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Unknown committed Jan 7, 2025
0 parents commit 7a0359f
Show file tree
Hide file tree
Showing 108 changed files with 24,977 additions and 0 deletions.
Empty file added .nojekyll
Empty file.
452 changes: 452 additions & 0 deletions 404.html

Large diffs are not rendered by default.

171 changes: 171 additions & 0 deletions assets/Scripts/Day1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#-----------------
#-----------------
# Introduction to statistics with R
# 2024
# In Lausanne
#-----------------
#-----------------



#-----------------
#-----------------
# 1st exercise
#-----------------

library(ISwR)
?hellung
data(hellung)

summary(hellung)

glucose <- hellung$glucose
mean(hellung$glucose)
sd(hellung$glucose)
mean(hellung$conc)
sd(hellung$conc)
mean(hellung$diameter)
sd(hellung$diameter)

means.hellung <- c()
for (i in 1:3){
means.hellung <- c(means.hellung, mean(hellung[,i]))
print(means.hellung)
}
means.hellung

stdev.hellung <- c()
for (i in 1:3){
stdev.hellung <- c(stdev.hellung, sd(hellung[,i]))
}
stdev.hellung

par(mfrow=c(2,2)) # for viewing multiple plots (2 rows x 2 columns = 4 plots)
hist(hellung$conc, main="Hellung data", xlab="concentration", ylab="freq")
hist(hellung$diameter, col="red")
hist(hellung$glucose)
hist(hellung$conc, breaks=20)

par(mfrow=c(1,2)) # for viewing multiple plots
boxplot( conc ~ glucose,data=hellung)
boxplot(diameter ~ glucose, data=hellung)
boxplot(hellung$conc ~ hellung$glucose)
boxplot(hellung$diameter ~ hellung$glucose)


cor(hellung$conc, hellung$diameter)
plot(diameter ~ conc, data=hellung)
cor(log(hellung$conc), hellung$diameter)
plot(diameter ~ log(conc), data=hellung)

# a fancy plot with ggplot :)
library(ggplot2)
ggplot(hellung, aes(x=log(conc), y=diameter)) + geom_point()



#-----------------
#-----------------
# 2nd exercise
#-----------------

setwd("/Users/Rachel/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/")
data <- read.csv("data.csv")


data
summary(data)
sd(data[,1]); sd(data[,2]); sd(data[,3])

datatoplot <- data[,1]
#pdf("datanumber1.pdf")
## Plot 4 rows of graphs on one plot
par(mfrow=c(4,1))

# 1st plot: individual points on the x-axis; random noise on the
# y-axis so that points are not too much superimposed
plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot))

# 2nd plot: histogram, with the density line superimposed
hist(datatoplot, xlim=range(datatoplot), breaks=20)
lines(density(datatoplot))

# 3rd plot: average +/- Sd
plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a")
arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3)

# 4th plot: boxplot
boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot))
#dev.off()

## now I am looking at the second column

datatoplot <- data[,2]
#pdf("datanumber2.pdf")
## Plot 4 rows of graphs on one plot
par(mfrow=c(4,1))

# 1st plot: individual points on the x-axis; random noise on the
# y-axis so that points are not too much superimposed
plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot))

# 2nd plot: histogram, with the density line superimposed
hist(datatoplot, xlim=range(datatoplot), breaks=20)
lines(density(datatoplot))

# 3rd plot: average +/- Sd
plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a")
arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3)

# 4th plot: boxplot
boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot))
#dev.off()

## now I am looking at the third column

datatoplot <- data[,3]
#pdf("datanumber3.pdf")
## Plot 4 rows of graphs on one plot
par(mfrow=c(4,1))

# 1st plot: individual points on the x-axis; random noise on the
# y-axis so that points are not too much superimposed
plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot))

# 2nd plot: histogram, with the density line superimposed
hist(datatoplot, xlim=range(datatoplot), breaks=20)
lines(density(datatoplot))

# 3rd plot: average +/- Sd
plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a")
arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3)

# 4th plot: boxplot
boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot))
#dev.off()


## Last exercise


student <- read.table("students.csv",header=TRUE,sep=",")
student[,8]<- as.numeric(gsub(",",".",student[,8]))
student[,1]<- as.factor(student[,1])
student[,5]<- as.factor(student[,5])
student[,6]<- as.factor(student[,6])
student[student[,"leftrighthanded"] =="D","leftrighthanded"] <- "R"
student[student[,"leftrighthanded"] =="G","leftrighthanded"] <- "L"

student[student[,"smoker"] =="NS ","smoker"] <- "NS"
student[student[,"smoker"] =="nS","smoker"] <- "NS"
student$smoker <- factor(student$smoker,levels=unique(student$smoker))
student$leftrighthanded <- as.factor(as.character(student$leftrighthanded))
summary(student)

student[student[,"height"] ==1.77,"height"] <-177

plot(student$height,student$weight,col=student$gender)
boxplot(height~gender,data=student)
boxplot(weight~gender,data=student)
hist(student$weight)

225 changes: 225 additions & 0 deletions assets/Scripts/Day2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#-----------------
#-----------------
# Introduction to statistics with R
# January 2024
# In Lausanne
#-----------------
#-----------------

library(faraway)
library(ggpubr)
library(ISwR)
library(rstatix)
library(tidyverse)



#-----------------
#-----------------
#-----------------
# 1st exercise
#-----------------

data(intake)
?intake
attach(intake)
intake
names(intake)
summary(intake)
mean(intake$pre)
sd(intake$pre)

# Assumption: no significant outliers in the data

ggboxplot(intake$pre, width = 0.5, add = c("mean","jitter"),
ylab = "premenstrual intake", xlab = F)

identify_outliers(as.data.frame(intake$pre))

# Assumption: normality
qqplot
qqline
ggqqplot(intake,"pre")

shapiro_test(intake$pre)

# t test

t.test(pre, mu=7725)
t.test(pre, mu=7725, alternative="less")



#-----------------
#-----------------
# 2nd exercise: two samples t-test
#-----------------

data(energy)
?energy
energy

# assumption 1: data in each group are normally distributed.

ind.obese <- which(energy$stature == "obese")
ind.lean <- which(energy$stature == "lean")

shapiro_test(energy$expend[ind.obese])
shapiro_test(energy$expend[ind.lean])

# assumption 2: the variances for the two independent groups are equal.

levene_test(energy, expend~stature)
# if the command above does not work, use the levene test from the car package
car::leveneTest(expend~stature, energy, center = "median")

# t test

t.test(energy$expend ~ energy$stature,var.equal=TRUE)

t.test(energy$expend ~ energy$stature)

t.test(energy$expend[ind.obese],energy$expend[ind.lean])
t.test(expend~stature,data=energy)

ggqqplot(energy[ind.lean,], "expend")
identify_outliers(as.data.frame(energy[ind.lean,"expend"]))

#-----------------
#-----------------
# 3rd exercise: paired t-test
#-----------------

data(intake)
?intake
attach(intake)
intake

# assumption 1: Each of the paired measurements must be obtained from the same subject
# check your sampling design !

# assumption 2: The measured differences are normally distributed.

intake.diff <- intake$post - intake$pre
#intake.diff.df <- as.data.frame(intake.diff)

shapiro_test(intake.diff)

# t test

t.test(intake$pre, intake$post, paired = T)



#-----------------
#-----------------
# 4th exercise: simulations of datasets
#-----------------
## change these how you want!
#rnorm
KO <- runif(10, min=27, max=34)
WT <- runif(10, min=27, max=34)
KO <- as.data.frame(KO)
names(KO) <- "weight"
KO$genotype <- "KO"
WT <- as.data.frame(WT)
names(WT) <- "weight"
WT$genotype <- "WT"

KO_WT <- rbind(KO,WT)

boxplot(KO_WT$weight ~ KO_WT$genotype, main="Mice weight at 18 weeks", xlab="", ylab="")

res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype)
res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T)

res.welch.test
res.t.test

res.welch.test$p.value
res.t.test$p.value

sim.p.welch.test <- NULL
sim.p.t.test <- NULL
sim.p.wilcox.test <- NULL

for (i in 1:1000) {
KO <- runif(3, min=27, max=29)
WT <- runif(3, min=29, max=34)
KO <- as.data.frame(KO)
names(KO)[1] <- "weight"
KO$genotype <- "KO"
WT <- as.data.frame(WT)
names(WT)[1] <- "weight"
WT$genotype <- "WT"

KO_WT <- rbind(KO,WT)

res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype)
res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T)
res.wilcox.test <- wilcox.test(KO_WT$weight ~ KO_WT$genotype)
sim.p.welch.test <- c(sim.p.welch.test, res.welch.test$p.value)
sim.p.t.test <- c(sim.p.t.test, res.t.test$p.value)
sim.p.wilcox.test <- c(sim.p.wilcox.test,res.wilcox.test$p.value)
}

str(sim.p.welch.test)
str(sim.p.t.test)
sum(sim.p.welch.test < 0.05)
sum(sim.p.t.test < 0.05)
plot(sim.p.t.test,sim.p.welch.test)
adj.bonf <- p.adjust(sim.p.welch.test, method="bonf")
sum(adj.bonf < 0.05)
adj.BH <- p.adjust(sim.p.welch.test, method="BH")
sum(adj.BH < 0.05)



#-----------------
#-----------------
# 5th exercise: ANOVA
#-----------------

data(coagulation)

# check the data
summary(coagulation)

get_summary_stats(group_by(coagulation,diet),coag, type = "mean_sd")


coagulation %>% group_by(diet) %>% get_summary_stats(coag, type = "mean_sd")

boxplot(coagulation$coag ~ coagulation$diet)

ggboxplot(coagulation, x="diet",y="coag")

# check normality
ind.A <- which(coagulation$diet=="A")
?ggqqplot
ggqqplot(coagulation[ind.A,], "coag")
ggqqplot(coagulation[coagulation$diet=="B",], "coag")
ggqqplot(coagulation[coagulation$diet=="C",], "coag")
ggqqplot(coagulation[coagulation$diet=="D",], "coag")

shapiro.test(coagulation[coagulation$diet=="A","coag"])
shapiro.test(coagulation[coagulation$diet=="B","coag"])
shapiro.test(coagulation[coagulation$diet=="C","coag"])
shapiro.test(coagulation[coagulation$diet=="D","coag"])

# check variance equality
levene_test(coagulation,coag~diet)
# if the command above does not work, use the levene test from the car package
car::leveneTest(coag~diet, coagulation, center = "median")

# do anova
anova_diet <- aov(coagulation$coag~coagulation$diet)

summary(anova_diet)

# check pairwise

TukeyHSD(anova_diet)

pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="bonf")
pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="BH")
Loading

0 comments on commit 7a0359f

Please sign in to comment.