Deployed 197f9fd with MkDocs version: 1.6.1

sib-swiss · Jan 7, 2025 · 7a0359f · 7a0359f
commit 7a0359f
Show file tree

Hide file tree

Showing 108 changed files with 24,977 additions and 0 deletions.
diff --git a/.nojekyll b/.nojekyll
diff --git a/404.html b/404.html
diff --git a/assets/Scripts/Day1.R b/assets/Scripts/Day1.R
@@ -0,0 +1,171 @@
+#-----------------
+#-----------------
+# Introduction to statistics with R
+# 2024
+# In Lausanne 
+#-----------------
+#-----------------
+
+
+
+#-----------------
+#-----------------
+# 1st exercise
+#-----------------
+
+library(ISwR)
+?hellung
+data(hellung) 
+
+summary(hellung)
+
+glucose <- hellung$glucose
+mean(hellung$glucose)
+sd(hellung$glucose)
+mean(hellung$conc)
+sd(hellung$conc)
+mean(hellung$diameter)
+sd(hellung$diameter)
+
+means.hellung <- c()
+for (i in 1:3){
+  means.hellung <- c(means.hellung, mean(hellung[,i]))
+  print(means.hellung)
+}
+means.hellung
+
+stdev.hellung <- c()
+for (i in 1:3){
+  stdev.hellung <- c(stdev.hellung, sd(hellung[,i]))
+}
+stdev.hellung
+
+par(mfrow=c(2,2)) # for viewing multiple plots (2 rows x 2 columns = 4 plots)
+hist(hellung$conc, main="Hellung data", xlab="concentration", ylab="freq")
+hist(hellung$diameter, col="red")
+hist(hellung$glucose)
+hist(hellung$conc, breaks=20)
+
+par(mfrow=c(1,2)) # for viewing multiple plots
+boxplot( conc ~ glucose,data=hellung)
+boxplot(diameter ~ glucose, data=hellung)
+boxplot(hellung$conc ~ hellung$glucose)
+boxplot(hellung$diameter ~ hellung$glucose)
+
+
+cor(hellung$conc, hellung$diameter)
+plot(diameter ~ conc, data=hellung) 
+cor(log(hellung$conc), hellung$diameter)
+plot(diameter ~ log(conc), data=hellung) 
+
+# a fancy plot with ggplot :)
+library(ggplot2)
+ggplot(hellung, aes(x=log(conc), y=diameter)) + geom_point()
+
+
+
+#-----------------
+#-----------------
+# 2nd exercise
+#-----------------
+
+setwd("/Users/Rachel/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/")
+data <- read.csv("data.csv") 
+
+
+data
+summary(data)
+sd(data[,1]); sd(data[,2]); sd(data[,3])
+
+datatoplot <- data[,1]
+#pdf("datanumber1.pdf")
+## Plot 4 rows of graphs on one plot
+par(mfrow=c(4,1))
+
+# 1st plot: individual points on the x-axis; random noise on the
+#           y-axis so that points are not too much superimposed
+plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot))
+
+# 2nd plot: histogram, with the density line superimposed
+hist(datatoplot, xlim=range(datatoplot), breaks=20)
+lines(density(datatoplot))
+
+# 3rd plot: average +/- Sd
+plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a")
+arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3)
+
+# 4th plot: boxplot
+boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot))
+#dev.off()
+
+## now I am looking at the second column
+
+datatoplot <- data[,2]
+#pdf("datanumber2.pdf")
+## Plot 4 rows of graphs on one plot
+par(mfrow=c(4,1))
+
+# 1st plot: individual points on the x-axis; random noise on the
+#           y-axis so that points are not too much superimposed
+plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot))
+
+# 2nd plot: histogram, with the density line superimposed
+hist(datatoplot, xlim=range(datatoplot), breaks=20)
+lines(density(datatoplot))
+
+# 3rd plot: average +/- Sd
+plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a")
+arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3)
+
+# 4th plot: boxplot
+boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot))
+#dev.off()
+
+## now I am looking at the third column
+
+datatoplot <- data[,3]
+#pdf("datanumber3.pdf")
+## Plot 4 rows of graphs on one plot
+par(mfrow=c(4,1))
+
+# 1st plot: individual points on the x-axis; random noise on the
+#           y-axis so that points are not too much superimposed
+plot(datatoplot, runif( length(datatoplot), -1, 1), xlim=range(datatoplot))
+
+# 2nd plot: histogram, with the density line superimposed
+hist(datatoplot, xlim=range(datatoplot), breaks=20)
+lines(density(datatoplot))
+
+# 3rd plot: average +/- Sd
+plot(mean(datatoplot), 0, xlim=range(datatoplot), main="Mean and standard deviation of a")
+arrows(mean(datatoplot)-sd(datatoplot), 0, mean(datatoplot)+sd(datatoplot), 0, angle=90, code=3)
+
+# 4th plot: boxplot
+boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot))
+#dev.off()
+
+
+## Last exercise
+
+
+student <- read.table("students.csv",header=TRUE,sep=",")
+student[,8]<- as.numeric(gsub(",",".",student[,8]))
+student[,1]<- as.factor(student[,1])
+student[,5]<- as.factor(student[,5])
+student[,6]<- as.factor(student[,6])
+student[student[,"leftrighthanded"] =="D","leftrighthanded"] <- "R"
+student[student[,"leftrighthanded"] =="G","leftrighthanded"] <- "L"
+
+student[student[,"smoker"] =="NS ","smoker"] <- "NS"
+student[student[,"smoker"] =="nS","smoker"] <- "NS"
+student$smoker <- factor(student$smoker,levels=unique(student$smoker))
+student$leftrighthanded <- as.factor(as.character(student$leftrighthanded))
+summary(student)
+
+student[student[,"height"] ==1.77,"height"] <-177
+
+plot(student$height,student$weight,col=student$gender)
+boxplot(height~gender,data=student)
+boxplot(weight~gender,data=student)
+hist(student$weight)
+
diff --git a/assets/Scripts/Day2.R b/assets/Scripts/Day2.R
@@ -0,0 +1,225 @@
+#-----------------
+#-----------------
+# Introduction to statistics with R
+# January 2024
+# In Lausanne 
+#-----------------
+#-----------------
+
+library(faraway)
+library(ggpubr)
+library(ISwR)
+library(rstatix)
+library(tidyverse)
+
+
+
+#-----------------
+#-----------------
+#-----------------
+# 1st exercise
+#-----------------
+
+data(intake)
+?intake
+attach(intake)
+intake 
+names(intake)
+summary(intake)
+mean(intake$pre)
+sd(intake$pre)
+
+# Assumption: no significant outliers in the data
+
+ggboxplot(intake$pre, width = 0.5, add = c("mean","jitter"), 
+          ylab = "premenstrual intake", xlab = F)
+
+identify_outliers(as.data.frame(intake$pre))
+
+# Assumption: normality
+qqplot
+qqline
+ggqqplot(intake,"pre")
+
+shapiro_test(intake$pre)
+
+# t test
+
+t.test(pre, mu=7725)
+t.test(pre, mu=7725, alternative="less")
+
+
+
+#-----------------
+#-----------------
+# 2nd exercise: two samples t-test 
+#-----------------
+
+data(energy)
+?energy
+energy 
+
+# assumption 1: data in each group are normally distributed.
+
+ind.obese <- which(energy$stature == "obese")
+ind.lean <- which(energy$stature == "lean")
+
+shapiro_test(energy$expend[ind.obese]) 
+shapiro_test(energy$expend[ind.lean])
+
+# assumption 2: the variances for the two independent groups are equal.
+
+levene_test(energy, expend~stature)
+# if the command above does not work, use the levene test from the car package
+car::leveneTest(expend~stature, energy, center = "median")
+
+# t test
+
+t.test(energy$expend ~ energy$stature,var.equal=TRUE)
+
+t.test(energy$expend ~ energy$stature)
+
+t.test(energy$expend[ind.obese],energy$expend[ind.lean])
+t.test(expend~stature,data=energy)
+
+ggqqplot(energy[ind.lean,], "expend")
+identify_outliers(as.data.frame(energy[ind.lean,"expend"]))
+
+#-----------------
+#-----------------
+# 3rd exercise: paired t-test
+#-----------------
+
+data(intake)
+?intake
+attach(intake)
+intake 
+
+# assumption 1: Each of the paired measurements must be obtained from the same subject
+# check your sampling design !
+
+# assumption 2: The measured differences are normally distributed.
+
+intake.diff <- intake$post - intake$pre
+#intake.diff.df <- as.data.frame(intake.diff)
+
+shapiro_test(intake.diff) 
+
+# t test
+
+t.test(intake$pre, intake$post, paired = T)
+
+
+
+#-----------------
+#-----------------
+# 4th exercise: simulations of datasets
+#-----------------
+## change these how you want!
+#rnorm
+KO <- runif(10, min=27, max=34)
+WT <- runif(10, min=27, max=34)
+KO <- as.data.frame(KO)
+names(KO) <- "weight"
+KO$genotype <- "KO"
+WT <- as.data.frame(WT)
+names(WT) <- "weight"
+WT$genotype <- "WT"
+
+KO_WT <- rbind(KO,WT)
+
+boxplot(KO_WT$weight ~ KO_WT$genotype, main="Mice weight at 18 weeks", xlab="", ylab="")
+
+res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype)
+res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T)
+
+res.welch.test
+res.t.test
+
+res.welch.test$p.value
+res.t.test$p.value
+
+sim.p.welch.test <- NULL
+sim.p.t.test <- NULL
+sim.p.wilcox.test <- NULL
+
+for (i in 1:1000) {
+  KO <- runif(3, min=27, max=29)
+  WT <- runif(3, min=29, max=34)
+  KO <- as.data.frame(KO)
+  names(KO)[1] <- "weight"
+  KO$genotype <- "KO"
+  WT <- as.data.frame(WT)
+  names(WT)[1] <- "weight"
+  WT$genotype <- "WT"
+
+  KO_WT <- rbind(KO,WT)
+
+  res.welch.test <- t.test(KO_WT$weight ~ KO_WT$genotype)
+  res.t.test <- t.test(KO_WT$weight ~ KO_WT$genotype, var.equal = T)
+  res.wilcox.test <- wilcox.test(KO_WT$weight ~ KO_WT$genotype)
+  sim.p.welch.test <- c(sim.p.welch.test, res.welch.test$p.value)
+  sim.p.t.test <- c(sim.p.t.test, res.t.test$p.value)
+  sim.p.wilcox.test <- c(sim.p.wilcox.test,res.wilcox.test$p.value)
+  }
+
+str(sim.p.welch.test)
+str(sim.p.t.test)
+sum(sim.p.welch.test < 0.05)
+sum(sim.p.t.test < 0.05)
+plot(sim.p.t.test,sim.p.welch.test)
+adj.bonf <- p.adjust(sim.p.welch.test, method="bonf")
+sum(adj.bonf < 0.05)
+adj.BH <- p.adjust(sim.p.welch.test, method="BH")
+sum(adj.BH < 0.05)
+
+
+
+#-----------------
+#-----------------
+# 5th exercise: ANOVA
+#-----------------
+
+data(coagulation)
+
+# check the data
+summary(coagulation)
+
+get_summary_stats(group_by(coagulation,diet),coag, type = "mean_sd") 
+
+
+coagulation %>% group_by(diet) %>% get_summary_stats(coag, type = "mean_sd") 
+
+boxplot(coagulation$coag ~ coagulation$diet)
+
+ggboxplot(coagulation, x="diet",y="coag")
+
+# check normality
+ind.A <- which(coagulation$diet=="A")
+?ggqqplot
+ggqqplot(coagulation[ind.A,], "coag")
+ggqqplot(coagulation[coagulation$diet=="B",], "coag")
+ggqqplot(coagulation[coagulation$diet=="C",], "coag")
+ggqqplot(coagulation[coagulation$diet=="D",], "coag")
+
+shapiro.test(coagulation[coagulation$diet=="A","coag"])
+shapiro.test(coagulation[coagulation$diet=="B","coag"])
+shapiro.test(coagulation[coagulation$diet=="C","coag"])
+shapiro.test(coagulation[coagulation$diet=="D","coag"])
+
+# check variance equality
+levene_test(coagulation,coag~diet)
+# if the command above does not work, use the levene test from the car package
+car::leveneTest(coag~diet, coagulation, center = "median")
+
+# do anova
+anova_diet <- aov(coagulation$coag~coagulation$diet)
+
+summary(anova_diet)
+
+# check pairwise 
+
+TukeyHSD(anova_diet)
+
+pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="bonf")
+pairwise.t.test(coagulation$coag,coagulation$diet,p.adj="BH")