Skip to content

Commit

Permalink
Merge branch 'prerelease-dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris Ulpinnis committed Mar 26, 2019
2 parents e6bd61e + b7d6996 commit 49f7633
Show file tree
Hide file tree
Showing 49 changed files with 6,217 additions and 400 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^binder/*$
^notebooks/*$
^LICENSE$
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
Package: GoldenMutagenesis
Type: Package
Encoding: UTF-8
Title: A tool to generate primers for Golden Gate Cloning
Version: 1.0.1
Date: 2018-10-12
Title: A tool to calculate and validate primers for Golden Gate Cloning
Version: 1.1.0
Date: 2019-01-21
Author: Chris Ulpinnis & Pascal Püllmann
Maintainer: Chris Ulpinnis <[email protected]>
Description: The Golden Gate cloning technique has been proven to be a highly efficient toolbox for a variety of cloning setups. Based on its modular concept it is particularly suitable for the use in multiple-site mutagenesis approaches. In this technical note we developed a protocol termed Golden Mutagenesis for the rapid, easy, reliable and cheap formation of mutagenesis libraries. One to five positions could be altered in parallel or simultaneously within two days. To facilitate the implementation of this technique, this R-library has been developed for the automated primer design and the graphical evaluation of sequencing results to determine the quality of the library.
Expand Down
2 changes: 1 addition & 1 deletion GoldenMutagenesis.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ LaTeX: pdfLaTeX
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
PackageRoxygenize: rd,collate,namespace,vignette
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ export(domesticate)
export(get_cu_table)
export(list_cu_table)
export(msd_mutate)
export(mutate)
export(mutate_spm)
export(primer_add_level)
export(primer_prepare_level)
export(print_primer)
import(RColorBrewer)
import(methods)
Expand Down
5 changes: 3 additions & 2 deletions R/classes.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ps<-setClass("Primerset", slots=c(oldsequence="character", primers="list", newsequence="character"))
pc<-setClass("Primer", slots=c(prefix="character" ,restriction_enzyme="character", suffix="character", vector="character", overhang="character" ,binding_sequence="character", temperature="numeric", difference="numeric"))
pc_msd<-setClass("Primer_MSD", contains="Primer", slots=c(NDT="character"))
pc<-setClass("Primer", slots=c(prefix="character" ,restriction_enzyme="character", suffix="character", vector="character", overhang="character", extra="character" ,binding_sequence="character", temperature="numeric", difference="numeric"))
pc_msd<-setClass("Primer_MSD", contains="Primer")
pc_spm<-setClass("Primer_SPM", contains="Primer")
fragment<-setClass("Fragment", slots=c(start="numeric", stop="numeric", start_mutation="vector", stop_mutation="vector"))
eps<-setClass("Extended_Primerset", contains="Primerset", slots=c(fragments="list"))
475 changes: 385 additions & 90 deletions R/exported_functions.R

Large diffs are not rendered by default.

167 changes: 129 additions & 38 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ get_cu_table<-function(name, list=T) {



calculate_tm<-function(x, salt=50, primer=50, offset=9){
calculate_tm<-function(x, salt_concentration=50, primer_concentration=50, offset=0){
oligo_sequence<-s2c(x)
oligo_sequence<-oligo_sequence[offset:length(oligo_sequence)]
# Tm= 100.5 + (41 * (yG+zC)/(wA+xT+yG+zC)) - (820/(wA+xT+yG+zC)) + 16.6*log10([Na+])
counts<-count(s2c(x), wordsize=1, by=1, alphabet = c("A", "C", "G", "T"))
tm<-100.5 + (41 * as.numeric(counts["G"] + counts["C"])/as.numeric(counts["A"]+counts["T"]+counts["G"]+counts["C"])) - (820/as.numeric(counts["A"]+counts["T"]+counts["G"]+counts["C"])) + 16.6*log10(salt/1000)
tm<-100.5 + (41 * as.numeric(counts["G"] + counts["C"])/as.numeric(counts["A"]+counts["T"]+counts["G"]+counts["C"])) - (820/as.numeric(counts["A"]+counts["T"]+counts["G"]+counts["C"])) + 16.6*log10(salt_concentration/1000)
return(as.numeric(tm))
}

Expand Down Expand Up @@ -145,7 +145,25 @@ calculate_DeltaS<-function(x){
return(s)
}

calculate_tm_nnb<-function(oligo_sequence, primer_concentration=50, salt_concentration=50, offset=9){
#' Calculate melting temperature based on next neighbor calculation
#'
#' The implementation is based on the explanations of \url{http://biotools.nubic.northwestern.edu/OligoCalc.html}.
#'
#' More details at \url{https://doi.org/10.1093/nar/gkm234}
#'
#' @param oligo_sequence A string containing an oligo sequence.
#' @param primer_concentration The concentration of the primer in nanomole [default: 50]
#' @param salt_concentration The concentration of Na+ in nanomole [default: 50]
#' @param offset You can skip a prefix of your oligo sequence with this parameter. The first n bases are not considered in the calculation. [default: 0]
#' @return An array or a list with values for the codons/amino acids.
#' @return The melting temperature in \code{print('\u00B0')}C
#'
#' @examples
#' \dontrun{
#' GoldenMutagenesis::calculate_tm_nnb("AAAAAATGGTGTGTGATGTGTCCCTCTATC")
#' }
#'
calculate_tm_nnb<-function(oligo_sequence, primer_concentration=50, salt_concentration=50, offset=0){
oligo_sequence_s2c<-s2c(oligo_sequence)
oligo_sequence<-paste(oligo_sequence_s2c[offset:length(oligo_sequence_s2c)], collapse="")
K<-1/(primer_concentration*1e-9) #Convert from nanomoles to moles
Expand All @@ -157,41 +175,91 @@ calculate_tm_nnb<-function(oligo_sequence, primer_concentration=50, salt_concent
}


setGeneric("sequence_length_temperature" , function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60) {
setGeneric("sequence_length_temperature" , function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60, gc_filter=F) {
standardGeneric("sequence_length_temperature")
})

setMethod("sequence_length_temperature", signature(primer="Primer"),
function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60){
function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60, gc_filter=F){
primer_seq_s2c<-s2c(primer@binding_sequence)
temperatures<-list()
names_i<-vector()
sequences_i<-vector()
for(i in (primer_min*3):length(primer_seq_s2c)){
temperatures<-c(temperatures, temp_func(paste(primer_seq_s2c[1:i],collapse=""), offset=0))
names_i<-c(names_i, i)
sequences_i<-c(sequences_i, paste(primer_seq_s2c[1:i],collapse=""))
}
names(temperatures)<-names_i
names(temperatures)<-sequences_i
diff<-unlist(lapply(temperatures, function(x){abs(x-target_temp)}))
candidate<-as.numeric(names(diff[diff==min(diff)]))
primer@binding_sequence<-paste(primer_seq_s2c[1:min(candidate)],collapse="")
primer@temperature<-temperatures[[as.character(min(candidate))]]
primer@difference<-as.numeric(diff[as.character(min(candidate))])
#check for at least two A or T
candidates_with_AT<-which(str_count(str_sub(names(diff), start=-5), "A|T")>=2 & str_count(str_sub(names(diff), start=-5), "A|T")<4)
if(length(candidates_with_AT) == 0 || gc_filter==F) {
candidate_binding_sequence<-names(diff[diff==min(diff)])
if(gc_filter==T) {
warning("The end (last five bases) of the binding sequence is not optimal. The primers are maybe inefficient.")
}
}
else{
diff_AT<-diff[candidates_with_AT]
candidate_binding_sequence<-names(diff_AT[diff_AT==min(diff_AT)])
}
primer@binding_sequence<-candidate_binding_sequence
primer@temperature<-as.numeric(temperatures[candidate_binding_sequence])
primer@difference<-as.numeric(diff[candidate_binding_sequence])
return(primer)
}
)

setMethod("sequence_length_temperature", signature(primer="Primer_MSD"),
function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60){
function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60, gc_filter=F){
callNextMethod()
}
)

setMethod("sequence_length_temperature", signature(primer="Primer_SPM"),
function(primer, temp_func=calculate_tm_nnb, primer_min=3, target_temp=60, gc_filter=F){
primer_seq_s2c<-s2c(paste(primer@extra, primer@binding_sequence, sep=""))
temperatures<-list()
names_i<-vector()
sequences_i<-vector()
for(i in max((primer_min*3), nchar(primer@extra)):length(primer_seq_s2c)){
temperatures<-c(temperatures, temp_func(paste(primer_seq_s2c[1:i],collapse=""), offset=0))
names_i<-c(names_i, i)
sequences_i<-c(sequences_i, paste(primer_seq_s2c[1:i],collapse=""))
}
names(temperatures)<-sequences_i
diff<-unlist(lapply(temperatures, function(x){abs(x-target_temp)}))
#check for at least two A or T
candidates_with_AT<-which(str_count(str_sub(names(diff), start=-5), "A|T")>=2 & str_count(str_sub(names(diff), start=-5), "A|T")<4)
if(length(candidates_with_AT) == 0 || gc_filter==F) {
candidate_binding_sequence<-names(diff[diff==min(diff)])
if(gc_filter==T) {
warning("The end (last five bases) of the binding sequence is not optimal. The primers are maybe inefficient.")
}
}
else{
diff_AT<-diff[candidates_with_AT]
candidate_binding_sequence<-names(diff_AT[diff_AT==min(diff_AT)])
}
primer@binding_sequence<-str_sub(candidate_binding_sequence, max(nchar(primer@extra)+1,0))
primer@temperature<-as.numeric(temperatures[candidate_binding_sequence])
primer@difference<-as.numeric(diff[candidate_binding_sequence])
return(primer)
}
)

sequence_check<-function(input_sequence){
input_sequence<-str_to_upper(input_sequence)
input_sequence<-str_trim(input_sequence)
if(nchar(input_sequence)%%3!=0) {
stop(paste("The length of the sequence is no factor of 3. Please check your sequence.", "The length of the sequence was:", nchar(input_sequence), sep=" "))
}

if(str_detect(input_sequence, "^(A|C|G|T)+$") == F) {
stop(paste("The sequence contains invalid characters that are not A|C|G|T."))
}

codon_seq<-splitseq(s2c(input_sequence))
met<-which(str_detect(codon_seq, "ATG"))
if(length(met) == 0) {
Expand All @@ -217,15 +285,29 @@ sequence_check<-function(input_sequence){
return(codon_seq)
}

check_primer_dupplicates<-function(primers, fragments, binding_min_length=4, target_temp=60) {
check_primer_overhangs<-function(primers, fragments, binding_min_length=4, target_temp=60, check_repetitive=T) {
#ToDo: Add paramter for temperature calculation method
overhangs<-sapply(primers, function(x){return(c(x[[1]]@overhang, x[[2]]@overhang))})
duplicates<-table(overhangs)
duplicates<-duplicates[names(duplicates)!="" & duplicates > 1]
if(length(duplicates)==0) {
if(check_repetitive == T) {
#Repetitive overhangs
rep<-table(overhangs)
rep<-rep[names(rep)!=""]
rep_temp<-names(rep)
rep<-str_count(names(rep), ("(^(A|T){4}$)|(^(G|C){4}$)"))
names(rep)<-rep_temp
rep<-rep[rep > 0]
rm(rep_temp)
bad_overhangs<-union(names(duplicates), names(rep))
} else {
bad_overhangs<-duplicates
}
if(length(bad_overhangs)==0) {
return(primers)
}
duplicate<-duplicates[1]
primer_num<-which(overhangs==names(duplicate))
bad_overhang<-bad_overhangs[1]
primer_num<-which(overhangs==bad_overhang)
primer_unlist<-unlist(primers)
fragment_num<-ceiling(primer_num)/2
primer_num2<-primer_num %% 2
Expand All @@ -245,7 +327,7 @@ check_primer_dupplicates<-function(primers, fragments, binding_min_length=4, tar
#check if primer_rv is an NDT primer
if(class(primer_rv)=="Primer_MSD") {
msd_mut<-sapply(c("NNN", "NNK", "NNS", "NDT", "DBK", "NRT"), FUN = function(x){paste(stringr::str_to_upper(rev(seqinr::comp(seqinr::s2c(x), , ambiguous=T))), sep="", collapse="")}, USE.NAMES = F)
if(str_sub(primer_rv@NDT, 1, 3) %in% msd_mut) {
if(str_sub(primer_rv@extra, 1, 3) %in% msd_mut) {
if(i == length(primer_num)) {
stop(paste("We can not fix overlaps in the primers. Please consider a silent mutation at position", fragments[[ceiling((primer_rv_num+1)/2)]]@start))
}
Expand All @@ -254,10 +336,12 @@ check_primer_dupplicates<-function(primers, fragments, binding_min_length=4, tar
}
}
else{
shift_base<-str_sub(primer_rv@NDT, 1, 1)
shift_base<-str_sub(primer_rv@extra, 1, 1)
primer_rv@overhang<-paste(primer_rv@overhang,shift_base, sep="")
primer_rv@NDT<-str_sub(primer_rv@NDT, 2)
primer_rv@extra<-str_sub(primer_rv@extra, 2)
primer_rv@overhang<-str_sub(primer_rv@overhang, 2)
primer_rv@temperature<-calculate_tm_nnb(primer_rv@binding_sequence, offset = 0)
primer_rv@difference<-abs(primer_rv@temperature - primer_unlist[[primer_rv_num -1 ]]@temperature)
}
} else {
if(nchar(primer_rv@binding_sequence) < 3 * binding_min_length) {
Expand All @@ -270,31 +354,41 @@ check_primer_dupplicates<-function(primers, fragments, binding_min_length=4, tar
}
else{
shift_base<-str_sub(primer_rv@binding_sequence, 1, 1)
primer_rv@overhang<-paste(primer_rv@overhang,shift_base, sep="")
primer_rv@extra<-paste(primer_rv@extra, shift_base, sep="")
primer_rv@binding_sequence<-str_sub(primer_rv@binding_sequence, 2)
shift_base<-str_sub(primer_rv@extra, 1, 1)
primer_rv@extra<-str_sub(primer_rv@extra, 2)
primer_rv@overhang<-paste(primer_rv@overhang,shift_base, sep="")
primer_rv@overhang<-str_sub(primer_rv@overhang, 2)
primer_rv@temperature<-calculate_tm_nnb(primer_rv@binding_sequence)
primer_rv@temperature<-calculate_tm_nnb(primer_rv@binding_sequence, offset = 0)
primer_rv@difference<-abs(primer_rv@temperature - primer_unlist[[primer_rv_num -1 ]]@temperature)
}
}
if(class(primer_fd)=="Primer_MSD") {
primer_fd@overhang<-paste(comp(shift_base, forceToLower = F), primer_fd@overhang, sep="")
primer_fd@NDT<-paste(str_sub(primer_fd@overhang, 5), primer_fd@NDT ,sep="")
primer_fd@overhang<-str_sub(primer_fd@overhang, 1, 4)
primers[[ceiling(primer_fd_num/2)]][[1]]<-primer_fd
primers[[ceiling(primer_rv_num/2)]][[2]]<-primer_rv
break
}
else{
#if(class(primer_fd)=="Primer_MSD") {
primer_fd@overhang<-paste(comp(shift_base, forceToLower = F), primer_fd@overhang, sep="")
primer_fd@binding_sequence<-paste(str_sub(primer_fd@overhang, 5), primer_fd@binding_sequence ,sep="")
primer_fd@extra<-paste(str_sub(primer_fd@overhang, 5), primer_fd@extra ,sep="")
if(class(primer_fd)=="Primer_SPM") {
primer_fd@binding_sequence<-paste(str_sub(primer_fd@extra, -1), primer_fd@binding_sequence ,sep="")
primer_fd@extra<-str_sub(primer_fd@extra, 1, -2)
}
primer_fd@overhang<-str_sub(primer_fd@overhang, 1, 4)
primer_fv@temperature<-calculate_tm_nnb(primer_fv@binding_sequence)
primer_fv@difference<-abs(target_temp - primer_fd@temperature)
primer_fd@temperature<-calculate_tm_nnb(primer_fd@binding_sequence, offset = 0)
primer_fd@difference<-abs(target_temp - primer_fd@temperature)
primers[[ceiling(primer_fd_num/2)]][[1]]<-primer_fd
primers[[ceiling(primer_rv_num/2)]][[2]]<-primer_rv
break
}
#}
#else{
# primer_fd@overhang<-paste(comp(shift_base, forceToLower = F), primer_fd@overhang, sep="")
# primer_fd@binding_sequence<-paste(str_sub(primer_fd@overhang, 5), primer_fd@binding_sequence ,sep="")
# primer_fd@overhang<-str_sub(primer_fd@overhang, 1, 4)
# primer_fd@temperature<-calculate_tm_nnb(primer_fd@binding_sequence)
# primer_fd@difference<-abs(target_temp - primer_fd@temperature)
# primer_rv@difference<-abs(primer_fd@temperature - primer_rv@temperature)
# primers[[ceiling(primer_fd_num/2)]][[1]]<-primer_fd
# primers[[ceiling(primer_rv_num/2)]][[2]]<-primer_rv
# break
#}
}

#overhangs<-sapply(primers, function(x){return(c(x[[1]]@overhang, x[[2]]@overhang))})
Expand All @@ -304,9 +398,6 @@ check_primer_dupplicates<-function(primers, fragments, binding_min_length=4, tar
# return(primers)
#}
#else{
return(check_primer_dupplicates(primers = primers, fragments = fragments, binding_min_length = binding_min_length, target_temp = target_temp))
return(check_primer_overhangs(primers = primers, fragments = fragments, binding_min_length = binding_min_length, target_temp = target_temp))
#}
}



}
Binary file modified data/MSD_BsaI_result_lv2.RData
Binary file not shown.
Binary file modified data/Point_Mutagenesis_BbsI_result.RData
Binary file not shown.
40 changes: 40 additions & 0 deletions inst/doc/MSD.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=TRUE)

## ------------------------------------------------------------------------
library("GoldenMutagenesis")

## ------------------------------------------------------------------------
input_sequence<-"ATGTCTCAGGTTCAGAGTGGCATTTTGCCAGAACATTGCCGCGCGGCGATTTGGATCGAAGCCAACGTGAAAGGGGAAGTTGACGCCCTGCGTGCGGCCAGTAAAACATTTGCCGACAAACTGGCAACTTTTGAAGCGAAATTCCCGGACGCGCATCTTGGTGCGGTGGTTGCCTTTGGTAACAACACCTGGCGCGCTCTGAGCGGCGGCGTTGGGGCAGAAGAGCTGAAAGATTTTCCGGGCTACGGTAAAGGCCTTGCGCCGACGACCCAGTTCGATGTGTTGATCCACATTCTTTCTCTGCGTCACGACGTAAACTTCTCTGTCGCCCAGGCGGCGATGGAAGCCTTTGGTGACTGCATTGAAGTGAAAGAAGAGATCCACGGCTTCCGTTGGGTTGAAGAGCGTGACCTGAGCGGCTTTGTTGACGGTACGGAAAACCCGGCGGGTGAAGAGACGCGTCGCGAAGTGGCGGTTATCAAAGACGGCGTGGATGCGGGCGGCAGCTATGTGTTTGTCCAGCGTTGGGAACACAACCTGAAGCAGCTCAACCGGATGAGCGTTCACGATCAGGAGATGGTGATCGGGCGCACCAAAGAGGCCAACGAAGAGATCGACGGCGACGAACGTCCGGAAACCTCTCACCTCACCCGCGTTGATCTGAAAGAAGATGGCAAAGGGCTGAAGATTGTTCGCCAGAGCCTGCCGTACGGCACTGCCAGTGGCACTCACGGTCTGTACTTCTGCGCCTACTGCGCGCGTCTGCATAACATTGAGCAGCAACTGCTGAGCATGTTTGGCGATACCGATGGTAAGCGTGATGCGATGTTGCGTTTCACCAAACCGGTAACCGGCGGCTATTATTTCGCACCGTCGCTGGACAAGTTGATGGCGCTGTAA"
recognition_site_bbsi<-"GAAGAC"
recognition_site_bsai<-"GGTCTC"
cuf<-"e_coli_316407.csv"



## ------------------------------------------------------------------------
mutations_bbsi<-domesticate(input_sequence, recognition_site_bbsi, cuf=cuf)
mutations_bbsi
mutations_bsai<-domesticate(input_sequence, recognition_site_bsai, cuf=cuf)
mutations_bsai


## ------------------------------------------------------------------------
#If domestication is necessary follow the workflow of the Point Mutagenesis vignette
mutations<-c(137,143,147,232,234)
primers<-msd_mutate(input_sequence, prefix="TT" ,restriction_enzyme=recognition_site_bsai, suffix="A", vector=c("AATG", "AAGC"), replacements=mutations, replacement_range=5, binding_min_length=4 ,primer_length=9, target_temp=60, fragment_min_size=60 )
primers

## ------------------------------------------------------------------------
primers_lvl0<-primer_add_level(primers, prefix="TT", restriction_enzyme=recognition_site_bbsi, suffix="AA", vector=c("CTCA", "CTCG"))
primers_lvl0

## ------------------------------------------------------------------------
print_primer(primers_lvl0)

## ----eval=FALSE----------------------------------------------------------
# sink("primers.txt", append=FALSE, split=FALSE)
# print_primer(primers_lvl0)
# sink()

Loading

0 comments on commit 49f7633

Please sign in to comment.