Skip to content

Commit

Permalink
change_script_2025
Browse files Browse the repository at this point in the history
  • Loading branch information
jeitziner committed Jan 31, 2025
1 parent 1e3b132 commit c117a9e
Show file tree
Hide file tree
Showing 5 changed files with 292 additions and 20 deletions.
36 changes: 29 additions & 7 deletions docs/assets/Scripts/Day1.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#-----------------
#-----------------
# Introduction to statistics with R
# 2024
# 2025
# In Lausanne
#-----------------
#-----------------


weight <- c(65,72,55,91,95,72) #this is the weight of our classroom people
height <- c(1.73, 1.80, 1.62, 1.90, 1.78, 1.93)
bmi <- weight / height^2
bmi # Type this in R to see the computed values

#-----------------
#-----------------
Expand Down Expand Up @@ -60,23 +63,24 @@ plot(diameter ~ log(conc), data=hellung)

# a fancy plot with ggplot :)
library(ggplot2)
ggplot(hellung, aes(x=log(conc), y=diameter)) + geom_point()

ggplot(hellung, aes(x=log(conc), y=diameter,col=glucose)) + geom_point()

hellung$glucose <- factor(hellung$glucose)

#-----------------
#-----------------
# 2nd exercise
#-----------------
data <- read.csv("~/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/data.csv", header=FALSE)

setwd("/Users/Rachel/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/")
setwd("/Users/rachelmarcone/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/")
data <- read.csv("data.csv")


data
summary(data)
sd(data[,1]); sd(data[,2]); sd(data[,3])

attach(data)
datatoplot <- data[,1]
#pdf("datanumber1.pdf")
## Plot 4 rows of graphs on one plot
Expand Down Expand Up @@ -145,6 +149,23 @@ boxplot(datatoplot, horizontal=TRUE, ylim=range(datatoplot))
#dev.off()


x <- rnorm(10000,mean=0, sd=1)
hist(x)
t.test(x,mu=0)$p.value


s <- rep(0,100) # this is an empty vector with 10 entries
for(i in 1:100){ # this is a loop, called a "for" loop, it will repeat
# everything in parenthesis 10 times changing the variable
# i from 1 to 10 at each iteration
x <- rnorm(10,mean=10, sd=1)
s[i] <- t.test(x,mu=10)$p.value # does a t.test then takes the p.value obtained and
# puts it into the i-th entry of s

}

s_adj <- p.adjust(s)

## Last exercise


Expand All @@ -163,9 +184,10 @@ student$leftrighthanded <- as.factor(as.character(student$leftrighthanded))
summary(student)

student[student[,"height"] ==1.77,"height"] <-177
student[,"siblings"]<- as.factor(student[,"siblings"])

plot(student$height,student$weight,col=student$gender)
boxplot(height~gender,data=student)
boxplot(weight~gender,data=student)
hist(student$weight)

pairs(student) ## two by two plots of data
33 changes: 21 additions & 12 deletions docs/assets/Scripts/Day2.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#-----------------
#-----------------
# Introduction to statistics with R
# January 2024
# January 2025
# In Lausanne
#-----------------
#-----------------
Expand Down Expand Up @@ -37,16 +37,18 @@ ggboxplot(intake$pre, width = 0.5, add = c("mean","jitter"),
identify_outliers(as.data.frame(intake$pre))

# Assumption: normality
qqplot
qqline

qqnorm(intake$pre)
qqline(intake$pre)

ggqqplot(intake,"pre")

shapiro_test(intake$pre)

# t test

t.test(pre, mu=7725)
t.test(pre, mu=7725, alternative="less")
t.test(pre, mu=7725) #one sample two sided t test
t.test(pre, mu=7725, alternative="less") #one sample one sided t test



Expand All @@ -61,12 +63,15 @@ energy

# assumption 1: data in each group are normally distributed.

ind.obese <- which(energy$stature == "obese")
ind.obese <- which(energy[,"stature"] == "obese")
ind.lean <- which(energy$stature == "lean")

shapiro_test(energy$expend[ind.obese])
qqnorm(energy$expend[ind.obese])
qqline(energy$expend[ind.obese])
shapiro_test(energy$expend[ind.lean])

qqnorm(energy$expend[ind.lean])
qqline(energy$expend[ind.lean])
# assumption 2: the variances for the two independent groups are equal.

levene_test(energy, expend~stature)
Expand Down Expand Up @@ -127,6 +132,8 @@ names(WT) <- "weight"
WT$genotype <- "WT"

KO_WT <- rbind(KO,WT)
shapiro.test(WT$weight)
shapiro.test(KO$weight)

boxplot(KO_WT$weight ~ KO_WT$genotype, main="Mice weight at 18 weeks", xlab="", ylab="")

Expand All @@ -144,13 +151,13 @@ sim.p.t.test <- NULL
sim.p.wilcox.test <- NULL

for (i in 1:1000) {
KO <- runif(3, min=27, max=29)
WT <- runif(3, min=29, max=34)
KO <- runif(3, min=30, max=34)
WT <- runif(3, min=27, max=29)
KO <- as.data.frame(KO)
names(KO)[1] <- "weight"
names(KO) <- "weight"
KO$genotype <- "KO"
WT <- as.data.frame(WT)
names(WT)[1] <- "weight"
names(WT) <- "weight"
WT$genotype <- "WT"

KO_WT <- rbind(KO,WT)
Expand All @@ -167,7 +174,9 @@ str(sim.p.welch.test)
str(sim.p.t.test)
sum(sim.p.welch.test < 0.05)
sum(sim.p.t.test < 0.05)
sum(sim.p.wilcox.test<0.05)
plot(sim.p.t.test,sim.p.welch.test)

adj.bonf <- p.adjust(sim.p.welch.test, method="bonf")
sum(adj.bonf < 0.05)
adj.BH <- p.adjust(sim.p.welch.test, method="BH")
Expand All @@ -192,7 +201,7 @@ coagulation %>% group_by(diet) %>% get_summary_stats(coag, type = "mean_sd")

boxplot(coagulation$coag ~ coagulation$diet)

ggboxplot(coagulation, x="diet",y="coag")
ggboxplot(coagulation, x="diet",y="coag",add="jitter")

# check normality
ind.A <- which(coagulation$diet=="A")
Expand Down
144 changes: 144 additions & 0 deletions docs/assets/Scripts/Day3_afternoon_multiple_regression.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#####################################
# Introduction to Statistics with R #
# Multiple Regression #
# Joao Lourenco #
# 29.01.2025 #
#####################################

# clear the environment
rm(list = ls())

# clear the command line
cat("\014")


###########################################
# multiple regression with two variables #
###########################################

# load the class dataset
class <- read.csv("/Users/rachelmarcone/Desktop/Introduction-to-statistics-with-R/docs/assets/exercises/class.csv")

# fit model with two independent variables
model <- lm(Height ~ Age + Weight, data = class)
summary(model)

# The order doesn't matter
summary(lm(Height ~ Weight + Age , data = class))

# compare to the models where each variable is considered separately
summary(lm(Height ~Age , data = class))
summary(lm(Height ~ Weight, data = class))


# example of a case in which the adjusted R2 decreseases when the number of variables increases

y <- rnorm(10) # create a random vector
X <- matrix(rnorm(100),ncol = 10, nrow = 10) #create a random matrix

# r-squared
plot(sapply( 1:10, function(i) summary(lm(y ~ X[,1:i]))$r.squared),type = "l")

# adjusted r-squared
plot(sapply( 1:10, function(i) summary(lm(y ~ X[,1:i]))$adj.r.squared),type = "l")

# last model
summary(lm(y ~ X))

#########################################################
# Categorical variables, dummy variables and contrasts #
#########################################################

## categorical variable

# convert Gender to a factor
class$Gender <- as.factor(class$Gender)
class$Gender
as.numeric(class$Gender)

summary(lm(Height ~ Age + Gender, data = class ))


# difference in means between males and females
#?tapply
#?diff
means <- tapply(class$Height, class$Gender, mean)
diff(means)

# where does this difference come from
summary(lm(Height ~ Gender, data = class ))
summary(lm(Height ~ Age + Gender, data = class ))


## interaction between Age and Gender
summary(lm(Height ~ Age + Gender + Age:Gender, data = class))
#summary(lm(Height ~ Age*Gender, data = class))


## What if males were the baseline

# create a new categorical variable
class$Gender1 <- relevel(class$Gender, ref="M")

# fit the model
summary(lm(Height ~ Age + Gender1, data = class))



###########################
# Diagnostic tools #
###########################

attach(class)


## examination of residuals
model <- lm(Height ~ Age , data = class)
plot(class$Age, residuals(model)) # works only for simple regression
plot(fitted(model), residuals(model)) # works only for simple regression

# hat values
?lm.influence
hat <- lm.influence(model)
plot(hat$hat, ylim = range(0,hat$hat, 3*2/19))
abline(h=c(2,3)*2/19, lty=2, col=c("blue","red"))


# Predictions with confidence intervals
?predict.lm
preds <- predict(model, interval = "prediction")
attach(class)
# Plot the data
plot(Age, Height)

# Add regression line
abline(model, col = "red", lwd = 2)




# Generate new predictor values for smooth plotting
new_x = seq(min(Age),max(Age),length.out = 100)

# get the prediction interval
prediction_interval <- predict(model,
newdata = data.frame(Age = new_x),
interval = "prediction")

# get the confidence intervals for a given level of confidence
confidence_interval <- predict(model,
newdata = data.frame(Age = new_x),
interval = "confidence")

# Create the scatterplot
plot(Age, Height, ylim = range(prediction_interval[, "lwr"],prediction_interval[, "upr"]))

# Add regression line
abline(model, col = "red", lwd = 2)


# Add bands
lines(new_x, confidence_interval[, "lwr"], lty = "dashed")
lines(new_x, confidence_interval[, "upr"], lty = "dashed")
lines(new_x, prediction_interval[, "lwr"], lty = "dotted")
lines(new_x, prediction_interval[, "upr"], lty = "dotted")
Loading

0 comments on commit c117a9e

Please sign in to comment.