diff --git a/Figure_1.R b/Figure_1.R index dfd8686..e9c1125 100644 --- a/Figure_1.R +++ b/Figure_1.R @@ -7,7 +7,7 @@ # by Simon H. Martin, John W. Davey and Chris D. Jiggins # Simon Martin: shm45@cam.ac.uk # John Davey: jd626@cam.ac.uk -# May 2014 +# August 2014 #################################################################################### diff --git a/Figure_4.R b/Figure_4.R index 6ada34e..07e0d18 100644 --- a/Figure_4.R +++ b/Figure_4.R @@ -7,7 +7,7 @@ # by Simon H. Martin, John W. Davey and Chris D. Jiggins # John Davey: jd626@cam.ac.uk # Simon Martin: shm45@cam.ac.uk -# October-December 2013, May 2014 +# August 2014 big_table <- read.delim("model_files_win10000_s0.01_l5000_r50.alternate_models.dxy.summary.sg.tsv", sep = "\t") diff --git a/Figure_5.R b/Figure_5.R index 2499200..b8e6899 100755 --- a/Figure_5.R +++ b/Figure_5.R @@ -7,7 +7,7 @@ # by Simon H. Martin, John W. Davey and Chris D. Jiggins # Simon Martin: shm45@cam.ac.uk # John Davey: jd626@cam.ac.uk -# November-December 2013, May 2014 +# August 2014 library(optparse) suppressMessages(library(ggplot2)) diff --git a/Figure_3.R b/Figures_3_S3.R similarity index 78% rename from Figure_3.R rename to Figures_3_S3.R index 0fe0add..6b37f0f 100644 --- a/Figure_3.R +++ b/Figures_3_S3.R @@ -1,13 +1,13 @@ #!/usr/bin/env Rscript -# Figures_3_S2_S3.R +# Figures_3_S3.R # Plots for whole genome Heliconius data # Written for "Evaluating statistics for the identification of introgressed loci" # by Simon H. Martin, John W. Davey and Chris D. Jiggins # John Davey: jd626@cam.ac.uk # Simon Martin: shm45@cam.ac.uk -# May 2014 +# August 2014 ############## Functions ############### @@ -47,8 +47,8 @@ for (chr in c(1:20, "Z")) { #input data -ABBA_table <- read.csv("Heliconius_autosome_windows.csv", as.is = T) -ABBA_table_Z <- read.csv("Heliconius_Zchromosome_windows.csv", as.is = T) +ABBA_table <- read.csv("Heliconius_autosome_windows_5kb.csv", as.is = T) +ABBA_table_Z <- read.csv("Heliconius_Zchromosome_windows_5kb.csv", as.is = T) ABBA_table <- rbind(ABBA_table, ABBA_table_Z) #filter for windows with enough sites above the minimum data cutoff @@ -70,7 +70,7 @@ bdsub <- subset(ABBA_table, scaffold == "HE670865" & position >= 300000 & positi ###### plotting D and f against number of segregating sites -png(file = "Figure_3.png", width = 1500, height = 4500, res = 500) +png(file = "Figure_3_raw.png", width = 1500, height = 4500, res = 500) # Note that this figure was further modified using Inkscape. par(mfrow = c(3,1), mar = c(3,3,1,1)) @@ -121,7 +121,7 @@ dev.off() #### D and all fs agains pi -png(file = "Figure_S2.png", width = 3500, height = 3500, res = 400) +png(file = "Figure_S3.png", width = 3500, height = 3500, res = 400) par(mfrow = c(2,2), mar = c(3.5,3.5,1,1)) @@ -154,32 +154,3 @@ points(bdsub$mean_Pi[bdsub$D >= 0], bdsub$fd[bdsub$D >= 0], cex = 1, pch = 1, co dev.off() - - - - - - - -### variances in mean_Pi bins - -pi_cuts <- cut(mean_Pi, seq(0,0.06,0.01)) - -D_vars <- sapply(1:length(levels(pi_cuts)), function(x){var(ABBA_table$D[pi_cuts == levels(pi_cuts)[x] & ABBA_table$D >= 0], na.rm = T)}) -fd_vars <- sapply(1:length(levels(pi_cuts)), function(x){var(ABBA_table$fd[pi_cuts == levels(pi_cuts)[x] & ABBA_table$D >= 0] , na.rm = T)}) -fG_vars <- sapply(1:length(levels(pi_cuts)), function(x){var(ABBA_table$fG[pi_cuts == levels(pi_cuts)[x] & ABBA_table$D >= 0 & ABBA_table$fG >= 0 & ABBA_table$fG <= 1], na.rm = T)}) -fhom_vars <- sapply(1:length(levels(pi_cuts)), function(x){var(ABBA_table$fhom[pi_cuts == levels(pi_cuts)[x] & ABBA_table$D >= 0 & ABBA_table$fhom >= 0 & ABBA_table$fhom <= 1], na.rm = T)}) - -png("Figure_S3.png", width = 2500, height = 2500, res = 400) -par(mar = c(4,4,1,1)) - -plot(D_vars, type = "b", pch = 19, xlab = "Pi bin", xaxt = "n", ylab = "Variance", ylim = c(0,0.1)) -axis(1, at=1:6, labels = c("0 - 0.01", "0.01 - 0.02", "0.02 - 0.03", "0.03 - 0.04", "0.04 - 0.05", "0.05 - 0.06"), las = 1) -points(fd_vars, type = "b") -points(fG_vars, type = "b", pch = 2) -points(fhom_vars, type = "b", pch = 3) - -legend(5,0.1, legend = c(expression(italic("D")), expression(italic("f"["G"])), expression(italic("f"["hom"])), expression(italic("f"["d"]))), pch = c(19,1,2,3), bty = "n") - -dev.off() - diff --git a/compare_f_estimators.r b/compare_f_estimators.r index 3585794..fb12f64 100644 --- a/compare_f_estimators.r +++ b/compare_f_estimators.r @@ -1,13 +1,13 @@ #!/usr/bin/env Rscript -# compare_f_estimators.R -# Simulations to compare D statistic and f estimators and make Figure 2 and Figure S1. +# compare_f_estimators.r +# Simulations to compare D statistic and f estimators - Figure 2, Figure S1. # Written for "Evaluating the use of ABBA-BABA statistics to locate introgressed loci" # by Simon H. Martin, John W. Davey and Chris D. Jiggins # Simon Martin: shm45@cam.ac.uk # John Davey: jd626@cam.ac.uk -# May 2014 +# August 2014 @@ -15,6 +15,8 @@ library(phyclust) library(parallel) library(plyr) + + ms.to.DNAbin <- function(ms_output, nsam=0, inc_inv=FALSE, len=NULL) { snp.strings <- ms_output[(length(ms_output)-nsam+1):length(ms_output)] binary.mat.var <- t(matrix(as.numeric(unlist(sapply(snp.strings,strsplit, split = ""))),ncol = length(snp.strings))) @@ -198,9 +200,9 @@ nreps = 100 stats <- c("D","fG", "fhom","fd") -t = 100 -len = 10000 -rec_number = 100 +t = 50 +len = 5000 +rec_number = 5 tGF = c(0.1,0.5) scale=0.01 theta = paste("-t", t) @@ -293,11 +295,11 @@ sd_data_23_D0 <- apply(all_data_23_D0,c(1,3,4,5),sd,na.rm=T) # get std err for a ###################################################################################### -### plot data from seqgen simulations for Figure S1 +### plot data from seqgen simulations jit = c(-0.01,0.01) cols = c("red","blue") -#plot + png(file = paste("f_predictors", "_reps", nreps, "_l", len, "_r", rec_number, "_GF", paste(tGF, collapse = "_"), "_seqgen", "_scale", scale, ".png", sep = ""), width = 2000, height = 3500, res = 400) par(mfrow = c(4,2), mar = c(3.5,3.5,2,1)) @@ -383,10 +385,10 @@ dev.off() #################################################################################################### -### simplified diagram for Figure 2 +### simplified diagram for paper -png(file = "Figure_2.png", width = 2000, height = 2000, res = 400) +png(file = "Figure_2_raw.png", width = 2000, height = 2000, res = 400) par(mfrow = c(2,2), mar = c(2.25,2.25,0.25,0.25), ann = F) @@ -395,12 +397,15 @@ plot(0, xlim = c(0,1), ylim = c(0,1), cex = 0, xaxt="n", yaxt="n", bty = "o") abline(0,1,lty=2, lwd = 1.5, col = "gray") points(fs,mean_data_32[,"D",1,"seqgen"], xlim = c(0,1), ylim = c(0,1), pch = 19) segments(fs, mean_data_32[,"D",1,"seqgen"] - sd_data_32[,"D",1,"seqgen"], fs, mean_data_32[,"D",1,"seqgen"] + sd_data_32[,"D",1,"seqgen"], lwd = 1.5) +#axis(1,at=c(0,.5,1)) axis(2,at=c(0,.5,1)) plot(0, xlim = c(0,1), ylim = c(0,1), cex = 0, xaxt="n", yaxt="n", bty = "o") abline(0,1,lty=2, lwd = 1.5, col = "gray") points(fs,mean_data_23[,"D",1,"seqgen"], xlim = c(0,1), ylim = c(0,1), pch = 19) segments(fs, mean_data_23[,"D",1,"seqgen"] - sd_data_23[,"D",1,"seqgen"], fs, mean_data_23[,"D",1,"seqgen"] + sd_data_23[,"D",1,"seqgen"], lwd = 1.5) +#axis(1,at=c(0,.5,1)) +#axis(2,at=c(0,.5,1)) #fd @@ -416,7 +421,7 @@ abline(0,1,lty=2, lwd = 1.5, col = "gray") points(fs,mean_data_23[,"fd",1,"seqgen"], xlim = c(0,1), ylim = c(0,1), pch = 19) segments(fs, mean_data_23[,"fd",1,"seqgen"] - sd_data_23[,"fd",1,"seqgen"], fs, mean_data_23[,"fd",1,"seqgen"] + sd_data_23[,"fd",1,"seqgen"], lwd = 1.5) axis(1,at=c(0,.5,1)) - +#axis(2,at=c(0,.5,1)) dev.off() diff --git a/egglib_sliding_windows.py b/egglib_sliding_windows.py index 30469c0..a9f3785 100755 --- a/egglib_sliding_windows.py +++ b/egglib_sliding_windows.py @@ -7,7 +7,7 @@ # by Simon H. Martin, John W. Davey and Chris D. Jiggins # Simon Martin: shm45@cam.ac.uk # John Davey: jd626@cam.ac.uk -# October-December 2013, May 2014 +# August 2014 @@ -818,7 +818,7 @@ def ABBABABA(align, P1, P2, P3, P4, P3a = None, P3b = None): mainOut.write(",NA") if "ABBABABA" in analyses: - mainOut.write(",NA,NA,NA,NA,NA") + mainOut.write(",NA,NA,NA,NA,NA,NA") #and end the line mainOut.write("\n") diff --git a/generate_summary_statistics.R b/generate_summary_statistics.R index e0ea5e6..98a7e8c 100755 --- a/generate_summary_statistics.R +++ b/generate_summary_statistics.R @@ -7,7 +7,7 @@ # by Simon H. Martin, John W. Davey and Chris D. Jiggins # Simon Martin: shm45@cam.ac.uk # John Davey: jd626@cam.ac.uk -# November-December 2013, May 2014 +# November-December 2013, May 2014, August 2014 library(parallel) suppressMessages(library(reshape)) diff --git a/Table_S1.R b/model_results_table.R similarity index 95% rename from Table_S1.R rename to model_results_table.R index d4bc0f9..68334f8 100755 --- a/Table_S1.R +++ b/model_results_table.R @@ -1,13 +1,13 @@ #!/usr/bin/env Rscript -# Table_S1.R -# Generate table S1 of all model results from summary statistics +# model_results_table.R +# Generate table of all model results from summary statistics # Written for "Evaluating the use of ABBA-BABA statistics to locate introgressed loci" # by Simon H. Martin, John W. Davey and Chris D. Jiggins # Simon Martin: shm45@cam.ac.uk # John Davey: jd626@cam.ac.uk -# November-December 2013, May 2014 +# November-December 2013, May 2014, August 2014 library(plyr) @@ -100,4 +100,4 @@ get.model.summary<-function(fileprefix, filepostfix, recombval) { r50<-cbind("4Nr"=0.01, get.model.summary("model_files_win10000_s0.01_l5000_r50.", ".summary.sg.tsv", 50)) r5<-cbind("4Nr"=0.001,get.model.summary("model_files_win10000_s0.01_l5000_r5.", ".summary.sg.tsv", 5)) -write.table(rbind(r50,r5), file="Table_S1.tsv", sep="\t", quote=FALSE, row.names=FALSE) \ No newline at end of file +write.table(rbind(r50,r5), file="model_results_table.txt", sep="\t", quote=FALSE, row.names=FALSE) \ No newline at end of file diff --git a/Table_S1.tsv b/model_results_table.txt similarity index 100% rename from Table_S1.tsv rename to model_results_table.txt diff --git a/run_model_combinations.py b/run_model_combinations.py index c1a508f..2eb4e0d 100755 --- a/run_model_combinations.py +++ b/run_model_combinations.py @@ -8,7 +8,7 @@ # Simon Martin: shm45@cam.ac.uk # John Davey: jd626@cam.ac.uk # November-December 2013 -# Revised February-May 2014 +# Revised February-May 2014, August 2014 import argparse from math import floor diff --git a/shared_ancestry_simulator.R b/shared_ancestry_simulator.R index 676e289..66205f9 100755 --- a/shared_ancestry_simulator.R +++ b/shared_ancestry_simulator.R @@ -8,7 +8,7 @@ # by Simon H. Martin, John W. Davey and Chris D. Jiggins # Simon Martin: shm45@cam.ac.uk # John Davey: jd626@cam.ac.uk -# October-December 2013, May 2014 +# October-December 2013, May 2014, August 2014 library(tools) library(parallel)