Skip to content

Commit

Permalink
Deployed 3d9ada0 with MkDocs version: 1.5.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Unknown committed Jan 22, 2024
0 parents commit 00a5c73
Show file tree
Hide file tree
Showing 100 changed files with 24,462 additions and 0 deletions.
Empty file added .nojekyll
Empty file.
452 changes: 452 additions & 0 deletions 404.html

Large diffs are not rendered by default.

116 changes: 116 additions & 0 deletions assets/Scripts/Day1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#-----------------
#-----------------
# Introduction to statistics with R
# 6-9 February 2023
# In Lausanne
#-----------------
#-----------------



#-----------------
#-----------------
# 1st exercise
#-----------------

library(ISwR)
?hellung
data(hellung)

summary(hellung)
glucose <- hellung$glucose
mean(hellung$glucose)
sd(hellung$glucose)
mean(hellung$conc)
sd(hellung$conc)
mean(hellung$diameter)
sd(hellung$diameter)

means.hellung <- c()
for (i in 1:3){
means.hellung <- c(means.hellung, mean(hellung[,i]))
}
means.hellung

stdev.hellung <- c()
for (i in 1:3){
stdev.hellung <- c(stdev.hellung, sd(hellung[,i]))
}
stdev.hellung

par(mfrow=c(2,2)) # for viewing multiple plots (2 rows x 2 columns = 4 plots)
hist(hellung$conc, main="a title", xlab="concentration", ylab="freq")
hist(hellung$diameter, col="red")
hist(hellung$glucose)
hist(hellung$conc, breaks=20)

par(mfrow=c(1,2)) # for viewing multiple plots
boxplot(conc ~ glucose, data=hellung)
boxplot(diameter ~ glucose, data=hellung)
boxplot(hellung$conc ~ hellung$glucose)
boxplot(hellung$diameter ~ hellung$glucose)

cor(hellung$conc, hellung$diameter)
plot(diameter ~ conc, data=hellung)
cor(log(hellung$conc), hellung$diameter)
plot(diameter ~ log(conc), data=hellung)

# a fancy plot with ggplot :)
library(ggplot2)
ggplot(hellung, aes(x=log(conc), y=diameter)) + geom_point()



#-----------------
#-----------------
# 2nd exercise
#-----------------

setwd("/Users/Rachel/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/")
data <- read.csv("data.csv")

data
summary(data)
sd(data[,1]); sd(data[,2]); sd(data[,3])

datatoplot <- data[,1]

## Plot 4 rows of graphs on one plot
par(mfrow=c(4,1))
dev.off()
# 1st plot: individual points on the x-axis; random noise on the
# y-axis so that points are not too much superimposed
plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot))

# 2nd plot: histogram, with the density line superimposed
hist(datatoplot, xlim=range(datatoplot), breaks=20)
lines(density(datatoplot))

# 3rd plot: average +/- Sd
plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a")
arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3)

# 4th plot: boxplot
boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot))


student <- read.table("students.csv",header=TRUE,sep=",")
student[,8]<- as.numeric(gsub(",",".",student[,8]))
student[,1]<- as.factor(student[,1])
student[,5]<- as.factor(student[,5])
student[,6]<- as.factor(student[,6])
student[student[,"leftrighthanded"] =="D","leftrighthanded"] <- "R"
student[student[,"leftrighthanded"] =="G","leftrighthanded"] <- "L"

student[student[,"smoker"] =="NS ","smoker"] <- "NS"
student[student[,"smoker"] =="nS","smoker"] <- "NS"
student$smoker <- factor(student$smoker,levels=unique(student$smoker))
student$leftrighthanded <- as.factor(as.character(student$leftrighthanded))
summary(student)

student[student[,"height"] ==1.77,"height"] <-177
plot(student$height,student$weight,col=student$gender)
boxplot(height~gender,data=student)
boxplot(weight~gender,data=student)
hist(student$weight)

213 changes: 213 additions & 0 deletions assets/Scripts/Day2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
#-----------------
#-----------------
# Introduction to statistics with R
# 6 - 9 February 2023
# In Lausanne
#-----------------
#-----------------

library(faraway)
library(ggpubr)
library(ISwR)
library(rstatix)
library(tidyverse)



#-----------------
#-----------------
#-----------------
# 1st exercise
#-----------------

data(intake)
?intake
attach(intake)
intake
names(intake)
summary(intake)
mean(intake$pre)
sd(intake$pre)

# Assumption: no significant outliers in the data

ggboxplot(intake$pre, width = 0.5, add = c("mean","jitter"),
ylab = "premenstrual intake", xlab = F)

identify_outliers(as.data.frame(intake$pre))

# Assumption: normality
qqplot
qqline
ggqqplot(intake,"pre")

shapiro_test(intake$pre)

# t test

t.test(pre, mu=7725)
t.test(pre, mu=7725, alternative="less")



#-----------------
#-----------------
# 2nd exercise: two samples t-test
#-----------------

data(energy)
?energy
energy

# assumption 1: data in each group are normally distributed.

ind.obese <- which(energy$stature == "obese")
ind.lean <- which(energy$stature == "lean")

shapiro_test(energy$expend[ind.obese])
shapiro_test(energy$expend[ind.lean])

# assumption 2: the variances for the two independent groups are equal.

levene_test(energy, expend~stature)
# if the command above does not work, use the levene test from the car package
car::leveneTest(expend~stature, energy, center = "median")

# t test

t.test(energy$expend ~ energy$stature,var.equal=TRUE)



#-----------------
#-----------------
# 3rd exercise: paired t-test
#-----------------

data(intake)
?intake
attach(intake)
intake

# assumption 1: Each of the paired measurements must be obtained from the same subject
# check your sampling design !

# assumption 2: The measured differences are normally distributed.

intake.diff <- intake$post - intake$pre
#intake.diff.df <- as.data.frame(intake.diff)

shapiro_test(intake.diff)

# t test

t.test(intake$pre, intake$post, paired = T)



#-----------------
#-----------------
# 4th exercise: simulations of datasets
#-----------------
## change these how you want!
KO <- rnorm(1000, mean=0,sd=1)
WT <- rnorm(10, mean=4, sd=1)
KO <- as.data.frame(KO)
names(KO) <- "weight"
KO$genotype <- "KO"
WT <- as.data.frame(WT)
names(WT) <- "weight"
WT$genotype <- "WT"

KO_WT <- rbind(KO,WT)

boxplot(KO_WT$weight ~ KO_WT$genotype, main="Mice weight at 18 weeks", xlab="", ylab="")

res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype)
res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T)

res.welch.test
res.t.test

res.welch.test$p.value
res.t.test$p.value

sim.p.welch.test <- NULL
sim.p.t.test <- NULL

for (i in 1:1000) {
KO <- runif(10, min=27, max=34)
WT <- runif(10, min=27, max=34)
KO <- as.data.frame(KO)
names(KO)[1] <- "weight"
KO$genotype <- "KO"
WT <- as.data.frame(WT)
names(WT)[1] <- "weight"
WT$genotype <- "WT"

KO_WT <- rbind(KO,WT)

res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype)
res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T)

sim.p.welch.test <- c(sim.p.welch.test, res.welch.test$p.value)
sim.p.t.test <- c(sim.p.t.test, res.t.test$p.value)
}

str(sim.p.welch.test)
str(sim.p.t.test)
sum(sim.p.welch.test < 0.05)
sum(sim.p.t.test < 0.05)
plot(sim.p.t.test,sim.p.welch.test)
adj.bonf <- p.adjust(sim.p.welch.test, method="bonf")
sum(adj.bonf < 0.05)
adj.BH <- p.adjust(sim.p.welch.test, method="BH")
sum(adj.BH < 0.05)



#-----------------
#-----------------
# 5th exercise: ANOVA
#-----------------

data(coagulation)

# check the data
summary(coagulation)
get_summary_stats(group_by(coagulation,diet),coag, type = "mean_sd")
coagulation %>% group_by(diet) %>% get_summary_stats(coag, type = "mean_sd")

boxplot(coagulation$coag ~ coagulation$diet)

ggboxplot(coagulation, x="diet",y="coag")

# check normality

?ggqqplot
ggqqplot(coagulation[coagulation$diet=="A",], "coag")
ggqqplot(coagulation[coagulation$diet=="B",], "coag")
ggqqplot(coagulation[coagulation$diet=="C",], "coag")
ggqqplot(coagulation[coagulation$diet=="D",], "coag")

shapiro.test(coagulation[coagulation$diet=="A","coag"])
shapiro.test(coagulation[coagulation$diet=="B","coag"])
shapiro.test(coagulation[coagulation$diet=="C","coag"])
shapiro.test(coagulation[coagulation$diet=="D","coag"])

# check variance equality
levene_test(coagulation,coag~diet)
# if the command above does not work, use the levene test from the car package
car::leveneTest(coag~diet, coagulation, center = "median")

# do anova
anova_diet <- aov(coagulation$coag~coagulation$diet)

summary(anova_diet)

# check pairwise

TukeyHSD(anova_diet)

pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="bonf")
pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="BH")
Loading

0 comments on commit 00a5c73

Please sign in to comment.